diff -Nru mesa-13.0.2/configure mesa-13.0.3/configure --- mesa-13.0.2/configure 2016-11-28 15:16:01.000000000 +0000 +++ mesa-13.0.3/configure 2017-01-05 15:52:21.000000000 +0000 @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for Mesa 13.0.2. +# Generated by GNU Autoconf 2.69 for Mesa 13.0.3. # # Report bugs to . # @@ -591,8 +591,8 @@ # Identity of this package. PACKAGE_NAME='Mesa' PACKAGE_TARNAME='mesa' -PACKAGE_VERSION='13.0.2' -PACKAGE_STRING='Mesa 13.0.2' +PACKAGE_VERSION='13.0.3' +PACKAGE_STRING='Mesa 13.0.3' PACKAGE_BUGREPORT='https://bugs.freedesktop.org/enter_bug.cgi?product=Mesa' PACKAGE_URL='' @@ -1819,7 +1819,7 @@ # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures Mesa 13.0.2 to adapt to many kinds of systems. +\`configure' configures Mesa 13.0.3 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1890,7 +1890,7 @@ if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of Mesa 13.0.2:";; + short | recursive ) echo "Configuration of Mesa 13.0.3:";; esac cat <<\_ACEOF @@ -2226,7 +2226,7 @@ test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -Mesa configure 13.0.2 +Mesa configure 13.0.3 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -2945,7 +2945,7 @@ This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by Mesa $as_me 13.0.2, which was +It was created by Mesa $as_me 13.0.3, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -3920,7 +3920,7 @@ # Define the identity of the package. PACKAGE='mesa' - VERSION='13.0.2' + VERSION='13.0.3' cat >>confdefs.h <<_ACEOF @@ -29350,7 +29350,7 @@ # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by Mesa $as_me 13.0.2, which was +This file was extended by Mesa $as_me 13.0.3, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -29407,7 +29407,7 @@ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -Mesa config.status 13.0.2 +Mesa config.status 13.0.3 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" diff -Nru mesa-13.0.2/debian/changelog mesa-13.0.3/debian/changelog --- mesa-13.0.2/debian/changelog 2017-01-14 01:32:35.000000000 +0000 +++ mesa-13.0.3/debian/changelog 2017-01-14 01:32:36.000000000 +0000 @@ -1,3 +1,60 @@ +mesa (13.0.3-1ubuntu2) zesty; urgency=medium + + * rules: Drop forcing -O2 to fix ppc64el ftbfs, gcc got fixed already. + (LP: #1605796) + * control: Fix dependencies for ubuntuBSD. (LP: #1565080) + * compat,rules: Don't bump compat to 10, breaks applying patches + before autoreconf which we need for the Mir EGL platform. + + -- Timo Aaltonen Thu, 12 Jan 2017 09:49:20 +0200 + +mesa (13.0.3-1ubuntu1) zesty; urgency=medium + + * Merge from Debian. + * intel: Add patches to support 16k textures. (LP: #1655556) + + -- Timo Aaltonen Wed, 11 Jan 2017 09:45:19 +0200 + +mesa (13.0.3-1) unstable; urgency=medium + + [ Timo Aaltonen ] + * New upstream release. + + [ Andreas Boll ] + * Add vl-zscan-fix-Fix-trivial-sign-compare-warnings.diff: Fixes mpeg2 + decoding on radeon GPUs without native mpeg2 support (Closes: + #846297). Thanks to Jörg-Volker Peetz for reporting and tracking + down this regression! + * Add r600-sb-Fix-loop-optimization-related-hangs-on-eg.diff: Fixes + GPU hangs on r600g (Evergreen and Northern Islands GPUs) in many + games. + + -- Timo Aaltonen Tue, 10 Jan 2017 17:38:23 +0200 + +mesa (13.0.2-3) unstable; urgency=medium + + * Fix typo to correctly disable asm on x32. Should fix FTBFS on x32. + * Add debian/source/format file. + + -- Andreas Boll Thu, 15 Dec 2016 16:42:25 +0100 + +mesa (13.0.2-2) unstable; urgency=medium + + * Bump llvm/libclang build-deps to >= 1:3.9.1. + - Enables support for OpenCL on AMD Polaris GPUs (Closes: #848173). + * Cherry-pick 6dc96de (cso: don't release sampler states that are + bound) from upstream master branch. Fixes random GPU hangs on + radeonsi (AMD Southern Islands and newer) in many games. + * Bump debhelper compat to 10. + * Stop passing --disable-silent-rules to configure, debhelper does + that for a while. + * Drop no longer needed dpkg-dev versioned build-dependency. + * Drop redundant cross-build detection, debhelper does this for us + already. + * Disable assembly usage on x32. Related to Bug #758094. + + -- Andreas Boll Thu, 15 Dec 2016 15:16:56 +0100 + mesa (13.0.2-1ubuntu1) zesty; urgency=medium * Merge from Debian. diff -Nru mesa-13.0.2/debian/control mesa-13.0.3/debian/control --- mesa-13.0.2/debian/control 2017-01-14 01:32:35.000000000 +0000 +++ mesa-13.0.3/debian/control 2017-01-14 01:32:36.000000000 +0000 @@ -6,8 +6,7 @@ Uploaders: Andreas Boll Standards-Version: 3.9.8 Build-Depends: - debhelper (>= 9), - dh-autoreconf, + debhelper (>= 10), quilt (>= 0.40), pkg-config, libdrm-dev (>= 2.4.69) [!hurd-any], @@ -15,7 +14,6 @@ x11proto-gl-dev (>= 1.4.14), libxxf86vm-dev, libexpat1-dev, - dpkg-dev (>= 1.16.1), libgcrypt20-dev [amd64 i386 arm64 armhf ppc64el x32], libsensors4-dev [!hurd-any], libxfixes-dev, @@ -40,16 +38,16 @@ libxcb-randr0-dev, libxcb-sync-dev, libxshmfence-dev (>= 1.1), - libmirclient-dev, - mir-client-platform-mesa-dev, + libmirclient-dev [linux-any], + mir-client-platform-mesa-dev [linux-any], python, python-mako, flex, bison, - llvm-3.9-dev (>= 1:3.9) [amd64 i386 kfreebsd-amd64 kfreebsd-i386 arm64 armhf ppc64el], + llvm-3.9-dev (>= 1:3.9.1) [amd64 i386 kfreebsd-amd64 kfreebsd-i386 arm64 armhf ppc64el], libelf-dev [amd64 i386 kfreebsd-amd64 kfreebsd-i386 arm64 armhf ppc64el], libwayland-dev (>= 1.2.0) [linux-any], - libclang-3.9-dev (>= 1:3.9) [amd64 i386 kfreebsd-amd64 kfreebsd-i386 arm64 armhf], + libclang-3.9-dev (>= 1:3.9.1) [amd64 i386 kfreebsd-amd64 kfreebsd-i386 arm64 armhf], libclc-dev (>= 0.2.0+git20160907) [amd64 i386 kfreebsd-amd64 kfreebsd-i386 arm64 armhf], Vcs-Git: https://anonscm.debian.org/git/pkg-xorg/lib/mesa.git Vcs-Browser: https://anonscm.debian.org/cgit/pkg-xorg/lib/mesa.git @@ -157,7 +155,7 @@ libxcb-sync-dev, libxshmfence-dev, libx11-xcb-dev, - libmirclient-dev [!arm64 !powerpc !ppc64 !ppc64el], + libmirclient-dev [!arm64 !powerpc !ppc64 !ppc64el !kfreebsd-any !hurd-any], libwayland-dev (>= 1.2.0) [linux-any], ${misc:Depends}, Multi-Arch: same diff -Nru mesa-13.0.2/debian/patches/0001-intel-blorp_blit-Create-structure-for-src-dst-coordi.patch mesa-13.0.3/debian/patches/0001-intel-blorp_blit-Create-structure-for-src-dst-coordi.patch --- mesa-13.0.2/debian/patches/0001-intel-blorp_blit-Create-structure-for-src-dst-coordi.patch 1970-01-01 00:00:00.000000000 +0000 +++ mesa-13.0.3/debian/patches/0001-intel-blorp_blit-Create-structure-for-src-dst-coordi.patch 2017-01-14 01:32:36.000000000 +0000 @@ -0,0 +1,137 @@ +From c64248223dd74ac67eb9033d39d51fc864f48595 Mon Sep 17 00:00:00 2001 +From: Jordan Justen +Date: Mon, 7 Nov 2016 14:06:49 -0800 +Subject: [PATCH 1/8] intel/blorp_blit: Create structure for src & dst + coordinates + +This will be useful for splitting blits into smaller sizes. + +We also make the coordinates of type double rather than float. Since +we will be splitting and scaling the coordinates, we might require +extra precision in the calculations. + +v2: + * Use double instead of float. (Jason) + +Signed-off-by: Jordan Justen +Reviewed-by: Jason Ekstrand +(cherry picked from commit b74d4f6ca02715470d8f7726d19aff342873dbc6) +--- + src/intel/blorp/blorp_blit.c | 75 +++++++++++++++++++++++++++++++++----------- + 1 file changed, 56 insertions(+), 19 deletions(-) + +diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c +index 018d997..e5232d4 100644 +--- a/src/intel/blorp/blorp_blit.c ++++ b/src/intel/blorp/blorp_blit.c +@@ -1433,15 +1433,20 @@ surf_retile_w_to_y(const struct isl_device *isl_dev, + info->tile_y_sa /= 2; + } + ++struct blt_axis { ++ double src0, src1, dst0, dst1; ++ bool mirror; ++}; ++ ++struct blt_coords { ++ struct blt_axis x, y; ++}; ++ + static void + do_blorp_blit(struct blorp_batch *batch, + struct blorp_params *params, + struct brw_blorp_blit_prog_key *wm_prog_key, +- float src_x0, float src_y0, +- float src_x1, float src_y1, +- float dst_x0, float dst_y0, +- float dst_x1, float dst_y1, +- bool mirror_x, bool mirror_y) ++ const struct blt_coords *coords) + { + const struct gen_device_info *devinfo = batch->blorp->isl_dev->info; + +@@ -1468,15 +1473,19 @@ do_blorp_blit(struct blorp_batch *batch, + /* Round floating point values to nearest integer to avoid "off by one texel" + * kind of errors when blitting. + */ +- params->x0 = params->wm_inputs.discard_rect.x0 = roundf(dst_x0); +- params->y0 = params->wm_inputs.discard_rect.y0 = roundf(dst_y0); +- params->x1 = params->wm_inputs.discard_rect.x1 = roundf(dst_x1); +- params->y1 = params->wm_inputs.discard_rect.y1 = roundf(dst_y1); ++ params->x0 = params->wm_inputs.discard_rect.x0 = round(coords->x.dst0); ++ params->y0 = params->wm_inputs.discard_rect.y0 = round(coords->y.dst0); ++ params->x1 = params->wm_inputs.discard_rect.x1 = round(coords->x.dst1); ++ params->y1 = params->wm_inputs.discard_rect.y1 = round(coords->y.dst1); + + brw_blorp_setup_coord_transform(¶ms->wm_inputs.coord_transform[0], +- src_x0, src_x1, dst_x0, dst_x1, mirror_x); ++ coords->x.src0, coords->x.src1, ++ coords->x.dst0, coords->x.dst1, ++ coords->x.mirror); + brw_blorp_setup_coord_transform(¶ms->wm_inputs.coord_transform[1], +- src_y0, src_y1, dst_y0, dst_y1, mirror_y); ++ coords->y.src0, coords->y.src1, ++ coords->y.dst0, coords->y.dst1, ++ coords->y.mirror); + + if (devinfo->gen > 6 && + params->dst.surf.msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) { +@@ -1710,10 +1719,24 @@ blorp_blit(struct blorp_batch *batch, + minify(params.src.surf.logical_level0_px.height, src_level) * + wm_prog_key.y_scale - 1.0f; + +- do_blorp_blit(batch, ¶ms, &wm_prog_key, +- src_x0, src_y0, src_x1, src_y1, +- dst_x0, dst_y0, dst_x1, dst_y1, +- mirror_x, mirror_y); ++ struct blt_coords coords = { ++ .x = { ++ .src0 = src_x0, ++ .src1 = src_x1, ++ .dst0 = dst_x0, ++ .dst1 = dst_x1, ++ .mirror = mirror_x ++ }, ++ .y = { ++ .src0 = src_y0, ++ .src1 = src_y1, ++ .dst0 = dst_y0, ++ .dst1 = dst_y1, ++ .mirror = mirror_y ++ } ++ }; ++ ++ do_blorp_blit(batch, ¶ms, &wm_prog_key, &coords); + } + + static enum isl_format +@@ -1891,8 +1914,22 @@ blorp_copy(struct blorp_batch *batch, + wm_prog_key.need_dst_offset = true; + } + +- do_blorp_blit(batch, ¶ms, &wm_prog_key, +- src_x, src_y, src_x + src_width, src_y + src_height, +- dst_x, dst_y, dst_x + dst_width, dst_y + dst_height, +- false, false); ++ struct blt_coords coords = { ++ .x = { ++ .src0 = src_x, ++ .src1 = src_x + src_width, ++ .dst0 = dst_x, ++ .dst1 = dst_x + dst_width, ++ .mirror = false ++ }, ++ .y = { ++ .src0 = src_y, ++ .src1 = src_y + src_height, ++ .dst0 = dst_y, ++ .dst1 = dst_y + dst_height, ++ .mirror = false ++ } ++ }; ++ ++ do_blorp_blit(batch, ¶ms, &wm_prog_key, &coords); + } +-- +2.7.4 + diff -Nru mesa-13.0.2/debian/patches/0002-intel-blorp_blit-Split-blorp-blits-if-they-are-too-l.patch mesa-13.0.3/debian/patches/0002-intel-blorp_blit-Split-blorp-blits-if-they-are-too-l.patch --- mesa-13.0.2/debian/patches/0002-intel-blorp_blit-Split-blorp-blits-if-they-are-too-l.patch 1970-01-01 00:00:00.000000000 +0000 +++ mesa-13.0.3/debian/patches/0002-intel-blorp_blit-Split-blorp-blits-if-they-are-too-l.patch 2017-01-14 01:32:36.000000000 +0000 @@ -0,0 +1,151 @@ +From bae700b49da2752b10cf47ee91685e8f848e0b3a Mon Sep 17 00:00:00 2001 +From: Jordan Justen +Date: Mon, 7 Nov 2016 14:06:56 -0800 +Subject: [PATCH 2/8] intel/blorp_blit: Split blorp blits if they are too large + +We rename do_blorp_blit() to try_blorp_blit(), and add a return error +if the surface size for the blit is too large. Now, do_blorp_blit() is +rewritten to try to split the blit into smaller operations if +try_blorp_blit() fails. + +Note: In this commit, try_blorp_blit() will always attempt to blit and +never return an error, which matches the previous behavior. We will +enable the size checking and splitting in a future commit. + +The motivation for this splitting is that in some cases when we +flatten an image, it's dimensions grow, and this can then exceed the +programmable hardware limits. An example is w-tiled+MSAA blits. + +v2: + * Use double instead of float. (Jason) + +Signed-off-by: Jordan Justen +Reviewed-by: Jason Ekstrand +(cherry picked from commit 12e0a6e25967e097f9d18e9ee25b30248f617b28) +--- + src/intel/blorp/blorp_blit.c | 102 ++++++++++++++++++++++++++++++++++++++++--- + 1 file changed, 96 insertions(+), 6 deletions(-) + +diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c +index e5232d4..8db8d5e 100644 +--- a/src/intel/blorp/blorp_blit.c ++++ b/src/intel/blorp/blorp_blit.c +@@ -1442,11 +1442,21 @@ struct blt_coords { + struct blt_axis x, y; + }; + +-static void +-do_blorp_blit(struct blorp_batch *batch, +- struct blorp_params *params, +- struct brw_blorp_blit_prog_key *wm_prog_key, +- const struct blt_coords *coords) ++enum blit_shrink_status { ++ BLIT_NO_SHRINK = 0, ++ BLIT_WIDTH_SHRINK = 1, ++ BLIT_HEIGHT_SHRINK = 2, ++}; ++ ++/* Try to blit. If the surface parameters exceed the size allowed by hardware, ++ * then enum blit_shrink_status will be returned. If BLIT_NO_SHRINK is ++ * returned, then the blit was successful. ++ */ ++static enum blit_shrink_status ++try_blorp_blit(struct blorp_batch *batch, ++ struct blorp_params *params, ++ struct brw_blorp_blit_prog_key *wm_prog_key, ++ const struct blt_coords *coords) + { + const struct gen_device_info *devinfo = batch->blorp->isl_dev->info; + +@@ -1647,7 +1657,87 @@ do_blorp_blit(struct blorp_batch *batch, + + brw_blorp_get_blit_kernel(batch->blorp, params, wm_prog_key); + +- batch->blorp->exec(batch, params); ++ unsigned result = 0; ++ ++ if (result == 0) { ++ batch->blorp->exec(batch, params); ++ } ++ ++ return result; ++} ++ ++/* Adjust split blit source coordinates for the current destination ++ * coordinates. ++ */ ++static void ++adjust_split_source_coords(const struct blt_axis *orig, ++ struct blt_axis *split_coords, ++ double scale) ++{ ++ /* When scale is greater than 0, then we are growing from the start, so ++ * src0 uses delta0, and src1 uses delta1. When scale is less than 0, the ++ * source range shrinks from the end. In that case src0 is adjusted by ++ * delta1, and src1 is adjusted by delta0. ++ */ ++ double delta0 = scale * (split_coords->dst0 - orig->dst0); ++ double delta1 = scale * (split_coords->dst1 - orig->dst1); ++ split_coords->src0 = orig->src0 + (scale >= 0.0 ? delta0 : delta1); ++ split_coords->src1 = orig->src1 + (scale >= 0.0 ? delta1 : delta0); ++} ++ ++static void ++do_blorp_blit(struct blorp_batch *batch, ++ struct blorp_params *params, ++ struct brw_blorp_blit_prog_key *wm_prog_key, ++ const struct blt_coords *orig) ++{ ++ struct blt_coords split_coords = *orig; ++ double w = orig->x.dst1 - orig->x.dst0; ++ double h = orig->y.dst1 - orig->y.dst0; ++ double x_scale = (orig->x.src1 - orig->x.src0) / w; ++ double y_scale = (orig->y.src1 - orig->y.src0) / h; ++ if (orig->x.mirror) ++ x_scale = -x_scale; ++ if (orig->y.mirror) ++ y_scale = -y_scale; ++ ++ bool x_done, y_done; ++ do { ++ enum blit_shrink_status result = ++ try_blorp_blit(batch, params, wm_prog_key, &split_coords); ++ ++ if (result & BLIT_WIDTH_SHRINK) { ++ w /= 2.0; ++ assert(w >= 1.0); ++ split_coords.x.dst1 = MIN2(split_coords.x.dst0 + w, orig->x.dst1); ++ adjust_split_source_coords(&orig->x, &split_coords.x, x_scale); ++ } ++ if (result & BLIT_HEIGHT_SHRINK) { ++ h /= 2.0; ++ assert(h >= 1.0); ++ split_coords.y.dst1 = MIN2(split_coords.y.dst0 + h, orig->y.dst1); ++ adjust_split_source_coords(&orig->y, &split_coords.y, y_scale); ++ } ++ ++ if (result != 0) ++ continue; ++ ++ y_done = (orig->y.dst1 - split_coords.y.dst1 < 0.5); ++ x_done = y_done && (orig->x.dst1 - split_coords.x.dst1 < 0.5); ++ if (x_done) { ++ break; ++ } else if (y_done) { ++ split_coords.x.dst0 += w; ++ split_coords.x.dst1 = MIN2(split_coords.x.dst0 + w, orig->x.dst1); ++ split_coords.y.dst0 = orig->y.dst0; ++ split_coords.y.dst1 = MIN2(split_coords.y.dst0 + h, orig->y.dst1); ++ adjust_split_source_coords(&orig->x, &split_coords.x, x_scale); ++ } else { ++ split_coords.y.dst0 += h; ++ split_coords.y.dst1 = MIN2(split_coords.y.dst0 + h, orig->y.dst1); ++ adjust_split_source_coords(&orig->y, &split_coords.y, y_scale); ++ } ++ } while (true); + } + + void +-- +2.7.4 + diff -Nru mesa-13.0.2/debian/patches/0003-intel-blorp_blit-Adjust-blorp-surface-parameters-for.patch mesa-13.0.3/debian/patches/0003-intel-blorp_blit-Adjust-blorp-surface-parameters-for.patch --- mesa-13.0.2/debian/patches/0003-intel-blorp_blit-Adjust-blorp-surface-parameters-for.patch 1970-01-01 00:00:00.000000000 +0000 +++ mesa-13.0.3/debian/patches/0003-intel-blorp_blit-Adjust-blorp-surface-parameters-for.patch 2017-01-14 01:32:36.000000000 +0000 @@ -0,0 +1,160 @@ +From e95729a4cbaaf33f093ff241c58dc584493f5a87 Mon Sep 17 00:00:00 2001 +From: Jordan Justen +Date: Mon, 7 Nov 2016 14:08:22 -0800 +Subject: [PATCH 3/8] intel/blorp_blit: Adjust blorp surface parameters for + split blits + +If try_blorp_blit() previously returned that a blit was too large, +shrink_surface_params() will be used to update the surface parameters +for the smaller blit so the blit operation can proceed. + +v2: + * Use double instead of float. (Jason) + +Signed-off-by: Jordan Justen +Reviewed-by: Jason Ekstrand +(cherry picked from commit edf3113aeddcf66cb24906e53a2d4f41616f8985) +--- + src/intel/blorp/blorp_blit.c | 97 ++++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 94 insertions(+), 3 deletions(-) + +diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c +index 8db8d5e..7c4158f 100644 +--- a/src/intel/blorp/blorp_blit.c ++++ b/src/intel/blorp/blorp_blit.c +@@ -1433,6 +1433,12 @@ surf_retile_w_to_y(const struct isl_device *isl_dev, + info->tile_y_sa /= 2; + } + ++static bool ++can_shrink_surfaces(const struct blorp_params *params) ++{ ++ return false; ++} ++ + struct blt_axis { + double src0, src1, dst0, dst1; + bool mirror; +@@ -1685,12 +1691,88 @@ adjust_split_source_coords(const struct blt_axis *orig, + split_coords->src1 = orig->src1 + (scale >= 0.0 ? delta1 : delta0); + } + ++static const struct isl_extent2d ++get_px_size_sa(const struct isl_surf *surf) ++{ ++ static const struct isl_extent2d one_to_one = { .w = 1, .h = 1 }; ++ ++ if (surf->msaa_layout != ISL_MSAA_LAYOUT_INTERLEAVED) ++ return one_to_one; ++ else ++ return isl_get_interleaved_msaa_px_size_sa(surf->samples); ++} ++ ++static void ++shrink_surface_params(const struct isl_device *dev, ++ struct brw_blorp_surface_info *info, ++ double *x0, double *x1, double *y0, double *y1) ++{ ++ uint32_t byte_offset, x_offset_sa, y_offset_sa, size; ++ struct isl_extent2d px_size_sa; ++ int adjust; ++ ++ surf_convert_to_single_slice(dev, info); ++ ++ px_size_sa = get_px_size_sa(&info->surf); ++ ++ /* Because this gets called after we lower compressed images, the tile ++ * offsets may be non-zero and we need to incorporate them in our ++ * calculations. ++ */ ++ x_offset_sa = (uint32_t)*x0 * px_size_sa.w + info->tile_x_sa; ++ y_offset_sa = (uint32_t)*y0 * px_size_sa.h + info->tile_y_sa; ++ isl_tiling_get_intratile_offset_sa(dev, info->surf.tiling, ++ info->surf.format, info->surf.row_pitch, ++ x_offset_sa, y_offset_sa, ++ &byte_offset, ++ &info->tile_x_sa, &info->tile_y_sa); ++ ++ info->addr.offset += byte_offset; ++ ++ adjust = (int)info->tile_x_sa / px_size_sa.w - (int)*x0; ++ *x0 += adjust; ++ *x1 += adjust; ++ info->tile_x_sa = 0; ++ ++ adjust = (int)info->tile_y_sa / px_size_sa.h - (int)*y0; ++ *y0 += adjust; ++ *y1 += adjust; ++ info->tile_y_sa = 0; ++ ++ size = MIN2((uint32_t)ceil(*x1), info->surf.logical_level0_px.width); ++ info->surf.logical_level0_px.width = size; ++ info->surf.phys_level0_sa.width = size * px_size_sa.w; ++ ++ size = MIN2((uint32_t)ceil(*y1), info->surf.logical_level0_px.height); ++ info->surf.logical_level0_px.height = size; ++ info->surf.phys_level0_sa.height = size * px_size_sa.h; ++} ++ ++static void ++shrink_surfaces(const struct isl_device *dev, ++ struct blorp_params *params, ++ struct brw_blorp_blit_prog_key *wm_prog_key, ++ struct blt_coords *coords) ++{ ++ /* Shrink source surface */ ++ shrink_surface_params(dev, ¶ms->src, &coords->x.src0, &coords->x.src1, ++ &coords->y.src0, &coords->y.src1); ++ wm_prog_key->need_src_offset = false; ++ ++ /* Shrink destination surface */ ++ shrink_surface_params(dev, ¶ms->dst, &coords->x.dst0, &coords->x.dst1, ++ &coords->y.dst0, &coords->y.dst1); ++ wm_prog_key->need_dst_offset = false; ++} ++ + static void + do_blorp_blit(struct blorp_batch *batch, +- struct blorp_params *params, ++ const struct blorp_params *orig_params, + struct brw_blorp_blit_prog_key *wm_prog_key, + const struct blt_coords *orig) + { ++ struct blorp_params params; ++ struct blt_coords blit_coords; + struct blt_coords split_coords = *orig; + double w = orig->x.dst1 - orig->x.dst0; + double h = orig->y.dst1 - orig->y.dst0; +@@ -1702,9 +1784,15 @@ do_blorp_blit(struct blorp_batch *batch, + y_scale = -y_scale; + + bool x_done, y_done; ++ bool shrink = false; + do { ++ params = *orig_params; ++ blit_coords = split_coords; ++ if (shrink) ++ shrink_surfaces(batch->blorp->isl_dev, ¶ms, wm_prog_key, ++ &blit_coords); + enum blit_shrink_status result = +- try_blorp_blit(batch, params, wm_prog_key, &split_coords); ++ try_blorp_blit(batch, ¶ms, wm_prog_key, &blit_coords); + + if (result & BLIT_WIDTH_SHRINK) { + w /= 2.0; +@@ -1719,8 +1807,11 @@ do_blorp_blit(struct blorp_batch *batch, + adjust_split_source_coords(&orig->y, &split_coords.y, y_scale); + } + +- if (result != 0) ++ if (result != 0) { ++ assert(can_shrink_surfaces(orig_params)); ++ shrink = true; + continue; ++ } + + y_done = (orig->y.dst1 - split_coords.y.dst1 < 0.5); + x_done = y_done && (orig->x.dst1 - split_coords.x.dst1 < 0.5); +-- +2.7.4 + diff -Nru mesa-13.0.2/debian/patches/0004-intel-blorp-Always-use-UINT-formats-on-SKL.patch mesa-13.0.3/debian/patches/0004-intel-blorp-Always-use-UINT-formats-on-SKL.patch --- mesa-13.0.2/debian/patches/0004-intel-blorp-Always-use-UINT-formats-on-SKL.patch 1970-01-01 00:00:00.000000000 +0000 +++ mesa-13.0.3/debian/patches/0004-intel-blorp-Always-use-UINT-formats-on-SKL.patch 2017-01-14 01:32:36.000000000 +0000 @@ -0,0 +1,140 @@ +From 51e1943f1b4a6a3412aeb5f90ffe24ec17aaa213 Mon Sep 17 00:00:00 2001 +From: Jason Ekstrand +Date: Wed, 26 Oct 2016 01:58:16 -0700 +Subject: [PATCH 4/8] intel/blorp: Always use UINT formats on SKL+ + +Many of these UINT formats aren't available prior to Sky Lake so we used +UNORM formats. Using UINT formats is a bit nicer because it guarantees we +don't run into rounding issues. Also, we will need it in the next commit +for handling copies with CCS enabled. + +Reviewed-by: Topi Pohjolainen +(cherry picked from commit 89f9c46a74ede69148bb5431a0deb8f09f3fa278) +--- + src/intel/blorp/blorp_blit.c | 66 +++++++++++++++++++++++++++++--------------- + 1 file changed, 44 insertions(+), 22 deletions(-) + +diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c +index 7c4158f..f8c5d4c 100644 +--- a/src/intel/blorp/blorp_blit.c ++++ b/src/intel/blorp/blorp_blit.c +@@ -1921,32 +1921,47 @@ blorp_blit(struct blorp_batch *batch, + } + + static enum isl_format +-get_copy_format_for_bpb(unsigned bpb) ++get_copy_format_for_bpb(const struct isl_device *isl_dev, unsigned bpb) + { +- /* The choice of UNORM and UINT formats is very intentional here. Most of +- * the time, we want to use a UINT format to avoid any rounding error in +- * the blit. For stencil blits, R8_UINT is required by the hardware. ++ /* The choice of UNORM and UINT formats is very intentional here. Most ++ * of the time, we want to use a UINT format to avoid any rounding error ++ * in the blit. For stencil blits, R8_UINT is required by the hardware. + * (It's the only format allowed in conjunction with W-tiling.) Also we + * intentionally use the 4-channel formats whenever we can. This is so +- * that, when we do a RGB <-> RGBX copy, the two formats will line up even +- * though one of them is 3/4 the size of the other. The choice of UNORM +- * vs. UINT is also very intentional because Haswell doesn't handle 8 or +- * 16-bit RGB UINT formats at all so we have to use UNORM there. ++ * that, when we do a RGB <-> RGBX copy, the two formats will line up ++ * even though one of them is 3/4 the size of the other. The choice of ++ * UNORM vs. UINT is also very intentional because we don't have 8 or ++ * 16-bit RGB UINT formats until Sky Lake so we have to use UNORM there. + * Fortunately, the only time we should ever use two different formats in + * the table below is for RGB -> RGBA blits and so we will never have any + * UNORM/UINT mismatch. + */ +- switch (bpb) { +- case 8: return ISL_FORMAT_R8_UINT; +- case 16: return ISL_FORMAT_R8G8_UINT; +- case 24: return ISL_FORMAT_R8G8B8_UNORM; +- case 32: return ISL_FORMAT_R8G8B8A8_UNORM; +- case 48: return ISL_FORMAT_R16G16B16_UNORM; +- case 64: return ISL_FORMAT_R16G16B16A16_UNORM; +- case 96: return ISL_FORMAT_R32G32B32_UINT; +- case 128:return ISL_FORMAT_R32G32B32A32_UINT; +- default: +- unreachable("Unknown format bpb"); ++ if (ISL_DEV_GEN(isl_dev) >= 9) { ++ switch (bpb) { ++ case 8: return ISL_FORMAT_R8_UINT; ++ case 16: return ISL_FORMAT_R8G8_UINT; ++ case 24: return ISL_FORMAT_R8G8B8_UINT; ++ case 32: return ISL_FORMAT_R8G8B8A8_UINT; ++ case 48: return ISL_FORMAT_R16G16B16_UINT; ++ case 64: return ISL_FORMAT_R16G16B16A16_UINT; ++ case 96: return ISL_FORMAT_R32G32B32_UINT; ++ case 128:return ISL_FORMAT_R32G32B32A32_UINT; ++ default: ++ unreachable("Unknown format bpb"); ++ } ++ } else { ++ switch (bpb) { ++ case 8: return ISL_FORMAT_R8_UINT; ++ case 16: return ISL_FORMAT_R8G8_UINT; ++ case 24: return ISL_FORMAT_R8G8B8_UNORM; ++ case 32: return ISL_FORMAT_R8G8B8A8_UNORM; ++ case 48: return ISL_FORMAT_R16G16B16_UNORM; ++ case 64: return ISL_FORMAT_R16G16B16A16_UNORM; ++ case 96: return ISL_FORMAT_R32G32B32_UINT; ++ case 128:return ISL_FORMAT_R32G32B32A32_UINT; ++ default: ++ unreachable("Unknown format bpb"); ++ } + } + } + +@@ -2002,7 +2017,7 @@ surf_convert_to_uncompressed(const struct isl_device *isl_dev, + info->tile_y_sa /= fmtl->bh; + + /* It's now an uncompressed surface so we need an uncompressed format */ +- info->surf.format = get_copy_format_for_bpb(fmtl->bpb); ++ info->surf.format = get_copy_format_for_bpb(isl_dev, fmtl->bpb); + } + + static void +@@ -2022,9 +2037,15 @@ surf_fake_rgb_with_red(const struct isl_device *isl_dev, + case ISL_FORMAT_R8G8B8_UNORM: + red_format = ISL_FORMAT_R8_UNORM; + break; ++ case ISL_FORMAT_R8G8B8_UINT: ++ red_format = ISL_FORMAT_R8_UINT; ++ break; + case ISL_FORMAT_R16G16B16_UNORM: + red_format = ISL_FORMAT_R16_UNORM; + break; ++ case ISL_FORMAT_R16G16B16_UINT: ++ red_format = ISL_FORMAT_R16_UINT; ++ break; + case ISL_FORMAT_R32G32B32_UINT: + red_format = ISL_FORMAT_R32_UINT; + break; +@@ -2049,6 +2070,7 @@ blorp_copy(struct blorp_batch *batch, + uint32_t dst_x, uint32_t dst_y, + uint32_t src_width, uint32_t src_height) + { ++ const struct isl_device *isl_dev = batch->blorp->isl_dev; + struct blorp_params params; + + if (src_width == 0 || src_height == 0) +@@ -2068,14 +2090,14 @@ blorp_copy(struct blorp_batch *batch, + const struct isl_format_layout *dst_fmtl = + isl_format_get_layout(params.dst.surf.format); + +- params.src.view.format = get_copy_format_for_bpb(src_fmtl->bpb); ++ params.src.view.format = get_copy_format_for_bpb(isl_dev, src_fmtl->bpb); + if (src_fmtl->bw > 1 || src_fmtl->bh > 1) { + surf_convert_to_uncompressed(batch->blorp->isl_dev, ¶ms.src, + &src_x, &src_y, &src_width, &src_height); + wm_prog_key.need_src_offset = true; + } + +- params.dst.view.format = get_copy_format_for_bpb(dst_fmtl->bpb); ++ params.dst.view.format = get_copy_format_for_bpb(isl_dev, dst_fmtl->bpb); + if (dst_fmtl->bw > 1 || dst_fmtl->bh > 1) { + surf_convert_to_uncompressed(batch->blorp->isl_dev, ¶ms.dst, + &dst_x, &dst_y, NULL, NULL); +-- +2.7.4 + diff -Nru mesa-13.0.2/debian/patches/0005-intel-blorp_blit-Move-RGB-R-conversion-to-follow-bli.patch mesa-13.0.3/debian/patches/0005-intel-blorp_blit-Move-RGB-R-conversion-to-follow-bli.patch --- mesa-13.0.2/debian/patches/0005-intel-blorp_blit-Move-RGB-R-conversion-to-follow-bli.patch 1970-01-01 00:00:00.000000000 +0000 +++ mesa-13.0.3/debian/patches/0005-intel-blorp_blit-Move-RGB-R-conversion-to-follow-bli.patch 2017-01-14 01:32:36.000000000 +0000 @@ -0,0 +1,173 @@ +From bf42682940d3dff85acbadebbabe82e3b73bb8d4 Mon Sep 17 00:00:00 2001 +From: Jordan Justen +Date: Wed, 30 Nov 2016 15:53:48 -0800 +Subject: [PATCH 5/8] intel/blorp_blit: Move RGB=>R conversion to follow blit + splitting + +In blorp_copy, when RGB surfaces are copied, we convert the +destination surface to a Red only surface, but 3 times as wide. This +introduces an implicit restriction of "mod 3" for the destination +width. + +It is easier to handle the blorp split buffer offsetting with the +original RGB surface, and do the RGB=>R after this. + +Suggested-by: Jason Ekstrand +Signed-off-by: Jordan Justen +Reviewed-by: Jason Ekstrand +(cherry picked from commit efea8e724458f6a388fb70421db3e655719fffb0) +--- + src/intel/blorp/blorp_blit.c | 113 +++++++++++++++++++++++++------------------ + 1 file changed, 65 insertions(+), 48 deletions(-) + +diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c +index f8c5d4c..662dab0 100644 +--- a/src/intel/blorp/blorp_blit.c ++++ b/src/intel/blorp/blorp_blit.c +@@ -1448,6 +1448,68 @@ struct blt_coords { + struct blt_axis x, y; + }; + ++static void ++surf_fake_rgb_with_red(const struct isl_device *isl_dev, ++ struct brw_blorp_surface_info *info, ++ uint32_t *x, uint32_t *width) ++{ ++ surf_convert_to_single_slice(isl_dev, info); ++ ++ info->surf.logical_level0_px.width *= 3; ++ info->surf.phys_level0_sa.width *= 3; ++ *x *= 3; ++ *width *= 3; ++ ++ enum isl_format red_format; ++ switch (info->view.format) { ++ case ISL_FORMAT_R8G8B8_UNORM: ++ red_format = ISL_FORMAT_R8_UNORM; ++ break; ++ case ISL_FORMAT_R8G8B8_UINT: ++ red_format = ISL_FORMAT_R8_UINT; ++ break; ++ case ISL_FORMAT_R16G16B16_UNORM: ++ red_format = ISL_FORMAT_R16_UNORM; ++ break; ++ case ISL_FORMAT_R16G16B16_UINT: ++ red_format = ISL_FORMAT_R16_UINT; ++ break; ++ case ISL_FORMAT_R32G32B32_UINT: ++ red_format = ISL_FORMAT_R32_UINT; ++ break; ++ default: ++ unreachable("Invalid RGB copy destination format"); ++ } ++ assert(isl_format_get_layout(red_format)->channels.r.type == ++ isl_format_get_layout(info->view.format)->channels.r.type); ++ assert(isl_format_get_layout(red_format)->channels.r.bits == ++ isl_format_get_layout(info->view.format)->channels.r.bits); ++ ++ info->surf.format = info->view.format = red_format; ++} ++ ++static void ++fake_dest_rgb_with_red(const struct isl_device *dev, ++ struct blorp_params *params, ++ struct brw_blorp_blit_prog_key *wm_prog_key, ++ struct blt_coords *coords) ++{ ++ /* Handle RGB destinations for blorp_copy */ ++ const struct isl_format_layout *dst_fmtl = ++ isl_format_get_layout(params->dst.surf.format); ++ ++ if (dst_fmtl->bpb % 3 == 0) { ++ uint32_t dst_x = coords->x.dst0; ++ uint32_t dst_width = coords->x.dst1 - dst_x; ++ surf_fake_rgb_with_red(dev, ¶ms->dst, ++ &dst_x, &dst_width); ++ coords->x.dst0 = dst_x; ++ coords->x.dst1 = dst_x + dst_width; ++ wm_prog_key->dst_rgb = true; ++ wm_prog_key->need_dst_offset = true; ++ } ++} ++ + enum blit_shrink_status { + BLIT_NO_SHRINK = 0, + BLIT_WIDTH_SHRINK = 1, +@@ -1462,10 +1524,12 @@ static enum blit_shrink_status + try_blorp_blit(struct blorp_batch *batch, + struct blorp_params *params, + struct brw_blorp_blit_prog_key *wm_prog_key, +- const struct blt_coords *coords) ++ struct blt_coords *coords) + { + const struct gen_device_info *devinfo = batch->blorp->isl_dev->info; + ++ fake_dest_rgb_with_red(batch->blorp->isl_dev, params, wm_prog_key, coords); ++ + if (isl_format_has_sint_channel(params->src.view.format)) { + wm_prog_key->texture_data_type = nir_type_int; + } else if (isl_format_has_uint_channel(params->src.view.format)) { +@@ -2020,46 +2084,6 @@ surf_convert_to_uncompressed(const struct isl_device *isl_dev, + info->surf.format = get_copy_format_for_bpb(isl_dev, fmtl->bpb); + } + +-static void +-surf_fake_rgb_with_red(const struct isl_device *isl_dev, +- struct brw_blorp_surface_info *info, +- uint32_t *x, uint32_t *width) +-{ +- surf_convert_to_single_slice(isl_dev, info); +- +- info->surf.logical_level0_px.width *= 3; +- info->surf.phys_level0_sa.width *= 3; +- *x *= 3; +- *width *= 3; +- +- enum isl_format red_format; +- switch (info->view.format) { +- case ISL_FORMAT_R8G8B8_UNORM: +- red_format = ISL_FORMAT_R8_UNORM; +- break; +- case ISL_FORMAT_R8G8B8_UINT: +- red_format = ISL_FORMAT_R8_UINT; +- break; +- case ISL_FORMAT_R16G16B16_UNORM: +- red_format = ISL_FORMAT_R16_UNORM; +- break; +- case ISL_FORMAT_R16G16B16_UINT: +- red_format = ISL_FORMAT_R16_UINT; +- break; +- case ISL_FORMAT_R32G32B32_UINT: +- red_format = ISL_FORMAT_R32_UINT; +- break; +- default: +- unreachable("Invalid RGB copy destination format"); +- } +- assert(isl_format_get_layout(red_format)->channels.r.type == +- isl_format_get_layout(info->view.format)->channels.r.type); +- assert(isl_format_get_layout(red_format)->channels.r.bits == +- isl_format_get_layout(info->view.format)->channels.r.bits); +- +- info->surf.format = info->view.format = red_format; +-} +- + void + blorp_copy(struct blorp_batch *batch, + const struct blorp_surf *src_surf, +@@ -2110,13 +2134,6 @@ blorp_copy(struct blorp_batch *batch, + uint32_t dst_width = src_width; + uint32_t dst_height = src_height; + +- if (dst_fmtl->bpb % 3 == 0) { +- surf_fake_rgb_with_red(batch->blorp->isl_dev, ¶ms.dst, +- &dst_x, &dst_width); +- wm_prog_key.dst_rgb = true; +- wm_prog_key.need_dst_offset = true; +- } +- + struct blt_coords coords = { + .x = { + .src0 = src_x, +-- +2.7.4 + diff -Nru mesa-13.0.2/debian/patches/0006-intel-blorp_blit-Enable-splitting-large-blorp-blits.patch mesa-13.0.3/debian/patches/0006-intel-blorp_blit-Enable-splitting-large-blorp-blits.patch --- mesa-13.0.2/debian/patches/0006-intel-blorp_blit-Enable-splitting-large-blorp-blits.patch 1970-01-01 00:00:00.000000000 +0000 +++ mesa-13.0.3/debian/patches/0006-intel-blorp_blit-Enable-splitting-large-blorp-blits.patch 2017-01-14 01:32:36.000000000 +0000 @@ -0,0 +1,87 @@ +From 540db63cab001051a997e69573e6a441a6133763 Mon Sep 17 00:00:00 2001 +From: Jordan Justen +Date: Mon, 7 Nov 2016 14:07:07 -0800 +Subject: [PATCH 6/8] intel/blorp_blit: Enable splitting large blorp blits + +Detect when the surface sizes are too large for a blorp blit. When it +is too large, the blorp blit will be split into a smaller operation +and attempted again. + +For gen7, this fixes the cts test: + +ES3-CTS.gtf.GL3Tests.framebuffer_blit.framebuffer_blit_functionality_multisampled_to_singlesampled_blit + +It will also enable us to increase our renderable size from 8k x 8k to +16k x 16k. + +Signed-off-by: Jordan Justen +Reviewed-by: Jason Ekstrand +(cherry picked from commit da381ae6475dfd35f1ab8c6063b4dce368ef7588) +--- + src/intel/blorp/blorp_blit.c | 41 ++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 40 insertions(+), 1 deletion(-) + +diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c +index 662dab0..51241cd 100644 +--- a/src/intel/blorp/blorp_blit.c ++++ b/src/intel/blorp/blorp_blit.c +@@ -1434,9 +1434,41 @@ surf_retile_w_to_y(const struct isl_device *isl_dev, + } + + static bool ++can_shrink_surface(const struct brw_blorp_surface_info *surf) ++{ ++ /* The current code doesn't support offsets into the aux buffers. This ++ * should be possible, but we need to make sure the offset is page ++ * aligned for both the surface and the aux buffer surface. Generally ++ * this mean using the page aligned offset for the aux buffer. ++ * ++ * Currently the cases where we must split the blit are limited to cases ++ * where we don't have a aux buffer. ++ */ ++ if (surf->aux_addr.buffer != NULL) ++ return false; ++ ++ /* We can't support splitting the blit for gen <= 7, because the qpitch ++ * size is calculated by the hardware based on the surface height for ++ * gen <= 7. In gen >= 8, the qpitch is controlled by the driver. ++ */ ++ if (surf->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY) ++ return false; ++ ++ return true; ++} ++ ++static bool + can_shrink_surfaces(const struct blorp_params *params) + { +- return false; ++ return ++ can_shrink_surface(¶ms->src) && ++ can_shrink_surface(¶ms->dst); ++} ++ ++static unsigned ++get_max_surface_size() ++{ ++ return 16384; + } + + struct blt_axis { +@@ -1728,6 +1760,13 @@ try_blorp_blit(struct blorp_batch *batch, + brw_blorp_get_blit_kernel(batch->blorp, params, wm_prog_key); + + unsigned result = 0; ++ unsigned max_surface_size = get_max_surface_size(devinfo, params); ++ if (params->src.surf.logical_level0_px.width > max_surface_size || ++ params->dst.surf.logical_level0_px.width > max_surface_size) ++ result |= BLIT_WIDTH_SHRINK; ++ if (params->src.surf.logical_level0_px.height > max_surface_size || ++ params->dst.surf.logical_level0_px.height > max_surface_size) ++ result |= BLIT_HEIGHT_SHRINK; + + if (result == 0) { + batch->blorp->exec(batch, params); +-- +2.7.4 + diff -Nru mesa-13.0.2/debian/patches/0007-intel-blorp_blit-Add-split_blorp_blit_debug-switch.patch mesa-13.0.3/debian/patches/0007-intel-blorp_blit-Add-split_blorp_blit_debug-switch.patch --- mesa-13.0.2/debian/patches/0007-intel-blorp_blit-Add-split_blorp_blit_debug-switch.patch 1970-01-01 00:00:00.000000000 +0000 +++ mesa-13.0.3/debian/patches/0007-intel-blorp_blit-Add-split_blorp_blit_debug-switch.patch 2017-01-14 01:32:36.000000000 +0000 @@ -0,0 +1,57 @@ +From be1dd376753de6759aeb0638793b1d9bf3d1cad1 Mon Sep 17 00:00:00 2001 +From: Jordan Justen +Date: Tue, 22 Nov 2016 17:20:42 -0800 +Subject: [PATCH 7/8] intel/blorp_blit: Add split_blorp_blit_debug switch + +Enabling this debug switch causes surface shrinking to happen by +default, and lowers the surface size limit which causes blorp blits to +be split. + +Signed-off-by: Jordan Justen +Reviewed-by: Jason Ekstrand +(cherry picked from commit d6526d724765e14fc9bb25cd2a53463a4d1c5fff) +--- + src/intel/blorp/blorp_blit.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c +index 51241cd..ae03680 100644 +--- a/src/intel/blorp/blorp_blit.c ++++ b/src/intel/blorp/blorp_blit.c +@@ -28,6 +28,8 @@ + + #define FILE_DEBUG_FLAG DEBUG_BLORP + ++static const bool split_blorp_blit_debug = false; ++ + /** + * Enum to specify the order of arguments in a sampler message + */ +@@ -1466,9 +1468,13 @@ can_shrink_surfaces(const struct blorp_params *params) + } + + static unsigned +-get_max_surface_size() ++get_max_surface_size(const struct gen_device_info *devinfo, ++ const struct blorp_params *params) + { +- return 16384; ++ if (split_blorp_blit_debug && can_shrink_surfaces(params)) ++ return 16384 >> 4; /* A smaller restriction when debug is enabled */ ++ else ++ return 16384; + } + + struct blt_axis { +@@ -1887,7 +1893,7 @@ do_blorp_blit(struct blorp_batch *batch, + y_scale = -y_scale; + + bool x_done, y_done; +- bool shrink = false; ++ bool shrink = split_blorp_blit_debug && can_shrink_surfaces(orig_params); + do { + params = *orig_params; + blit_coords = split_coords; +-- +2.7.4 + diff -Nru mesa-13.0.2/debian/patches/0008-i965-Increase-max-texture-to-16k-for-gen7.patch mesa-13.0.3/debian/patches/0008-i965-Increase-max-texture-to-16k-for-gen7.patch --- mesa-13.0.2/debian/patches/0008-i965-Increase-max-texture-to-16k-for-gen7.patch 1970-01-01 00:00:00.000000000 +0000 +++ mesa-13.0.3/debian/patches/0008-i965-Increase-max-texture-to-16k-for-gen7.patch 2017-01-14 01:32:36.000000000 +0000 @@ -0,0 +1,45 @@ +From 9a19d5a37ae479767d56706275cbe7cc9770d5c7 Mon Sep 17 00:00:00 2001 +From: Jordan Justen +Date: Thu, 3 Nov 2016 12:20:19 -0700 +Subject: [PATCH 8/8] i965: Increase max texture to 16k for gen7+ + +Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98297 +Signed-off-by: Jordan Justen +Reviewed-by: Jason Ekstrand +(cherry picked from commit e9133dd90ec498cfb6a23fa22504e06488352c51) +--- + src/mesa/drivers/dri/i965/brw_context.c | 13 ++++++++++--- + 1 file changed, 10 insertions(+), 3 deletions(-) + +diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c +index d6204fd..0ccc574 100644 +--- a/src/mesa/drivers/dri/i965/brw_context.c ++++ b/src/mesa/drivers/dri/i965/brw_context.c +@@ -523,14 +523,21 @@ brw_initialize_context_constants(struct brw_context *brw) + + ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */ + ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS; +- ctx->Const.MaxRenderbufferSize = 8192; +- ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS); ++ if (brw->gen >= 7) { ++ ctx->Const.MaxRenderbufferSize = 16384; ++ ctx->Const.MaxTextureLevels = MIN2(15 /* 16384 */, MAX_TEXTURE_LEVELS); ++ ctx->Const.MaxCubeTextureLevels = 15; /* 16384 */ ++ } else { ++ ctx->Const.MaxRenderbufferSize = 8192; ++ ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS); ++ ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */ ++ } + ctx->Const.Max3DTextureLevels = 12; /* 2048 */ +- ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */ + ctx->Const.MaxArrayTextureLayers = brw->gen >= 7 ? 2048 : 512; + ctx->Const.MaxTextureMbytes = 1536; + ctx->Const.MaxTextureRectSize = 1 << 12; + ctx->Const.MaxTextureMaxAnisotropy = 16.0; ++ ctx->Const.MaxTextureLodBias = 15.0; + ctx->Const.StripTextureBorder = true; + if (brw->gen >= 7) + ctx->Const.MaxProgramTextureGatherComponents = 4; +-- +2.7.4 + diff -Nru mesa-13.0.2/debian/patches/r600-sb-Fix-loop-optimization-related-hangs-on-eg.diff mesa-13.0.3/debian/patches/r600-sb-Fix-loop-optimization-related-hangs-on-eg.diff --- mesa-13.0.2/debian/patches/r600-sb-Fix-loop-optimization-related-hangs-on-eg.diff 1970-01-01 00:00:00.000000000 +0000 +++ mesa-13.0.3/debian/patches/r600-sb-Fix-loop-optimization-related-hangs-on-eg.diff 2017-01-14 01:32:36.000000000 +0000 @@ -0,0 +1,387 @@ +commit e933246013eef376804662f3fcf4646c143c6c88 +Author: Heiko Przybyl +Date: Sun Nov 20 14:42:28 2016 +0100 + + r600/sb: Fix loop optimization related hangs on eg + + Make sure unused ops and their references are removed, prior to entering + the GCM (global code motion) pass, to stop GCM from breaking the loop + logic and thus hanging the GPU. + + Turns out, that sb has problems with loops and node optimizations + regarding associative folding: + + - the global code motion (gcm) pass moves ops up a loop level/basic block + until they've fulfilled their total usage count + - if there are ops folded into others, the usage count won't be + fulfilled and thus the op moved way up to the top + - within GCM the op would be visited and their deps would be moved + alongside it, to fulfill the src constaints + - in a loop, an unused op is moved out of the loop and GCM would move + the src value ops up as well + - now here arises the problem: if the loop counter is one of the src + values it would get moved up as well, the loop break condition would + never get hit and the shader turn into an endless loop, resulting in the + GPU hanging and being reset + + A reduced (albeit nonsense) piglit example would be: + + [require] + GLSL >= 1.20 + + [fragment shader] + + uniform int SIZE; + uniform vec4 lights[512]; + + void main() + { + float x = 0; + for(int i = 0; i < SIZE; i++) + x += lights[2*i+1].x; + } + + [test] + uniform int SIZE 1 + draw rect -1 -1 2 2 + + Which gets optimized to: + + ===== SHADER #12 OPT ================================== PS/BARTS/EVERGREEN ===== + ===== 42 dw ===== 1 gprs ===== 2 stack ========================================= + ALU 3 @24 + 1 y: MOV R0.y, 0 + t: MULLO_UINT R0.w, [0x00000002 2.8026e-45].x, R0.z + + LOOP_START_DX10 @22 + PUSH @6 + ALU 1 @30 KC0[CB0:0-15] + 2 M x: PRED_SETGE_INT __.x, R0.z, KC0[0].x + JUMP @14 POP:1 + LOOP_BREAK @20 + POP @14 POP:1 + ALU 2 @32 + 3 x: ADD_INT R0.x, R0.w, [0x00000002 2.8026e-45].x + + TEX 1 @36 + VFETCH R0.x___, R0.x, RID:0 MFC:16 UCF:0 FMT[..] + ALU 1 @40 + 4 y: ADD R0.y, R0.y, R0.x + LOOP_END @4 + EXPORT_DONE PIXEL 0 R0.____ EOP + ===== SHADER_END =============================================================== + + Notice R0.z being the loop counter/break condition relevant register + and being never incremented at all. Also some of the loop content + has been moved out of it, to fulfill the requirements for the one unused + op. + + With a debug build of mesa this would produce an error like + error at : PRED_SETGE_INT __, __, EM.2, R1.x.2||FP@R0.z, C0.x + : operand value R1.x.2||FP@R0.z was not previously written to its gpr + and the compilation would fail due to this. On a release build it gets + passed to the GPU. + + When using this patch, the loop remains intact: + + ===== SHADER #12 OPT ================================== PS/BARTS/EVERGREEN ===== + ===== 48 dw ===== 1 gprs ===== 2 stack ========================================= + ALU 2 @24 + 1 y: MOV R0.y, 0 + z: MOV R0.z, 0 + LOOP_START_DX10 @22 + PUSH @6 + ALU 1 @28 KC0[CB0:0-15] + 2 M x: PRED_SETGE_INT __.x, R0.z, KC0[0].x + JUMP @14 POP:1 + LOOP_BREAK @20 + POP @14 POP:1 + ALU 4 @30 + 3 t: MULLO_UINT T0.x, [0x00000002 2.8026e-45].x, R0.z + + 4 x: ADD_INT R0.x, T0.x, [0x00000002 2.8026e-45].x + + TEX 1 @40 + VFETCH R0.x___, R0.x, RID:0 MFC:16 UCF:0 FMT[..] + ALU 2 @44 + 5 y: ADD R0.y, R0.y, R0.x + z: ADD_INT R0.z, R0.z, 1 + LOOP_END @4 + EXPORT_DONE PIXEL 0 R0.____ EOP + ===== SHADER_END =============================================================== + + Piglit: ./piglit summary console -d results/*_gpu_noglx + name: unpatched_gpu_noglx patched_gpu_noglx + ---- ------------------- ----------------- + pass: 18016 18021 + fail: 748 743 + crash: 7 7 + skip: 1124 1124 + timeout: 0 0 + warn: 13 13 + incomplete: 0 0 + dmesg-warn: 0 0 + dmesg-fail: 0 0 + changes: 0 5 + fixes: 0 5 + regressions: 0 0 + total: 19908 19908 + + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94900 + Tested-by: Heiko Przybyl + Tested-on: Barts PRO HD6850 + Signed-off-by: Heiko Przybyl + Signed-off-by: Marek Olšák + +diff --git a/src/gallium/drivers/r600/sb/sb_dce_cleanup.cpp b/src/gallium/drivers/r600/sb/sb_dce_cleanup.cpp +index 79aef91..abae2bf 100644 +--- a/src/gallium/drivers/r600/sb/sb_dce_cleanup.cpp ++++ b/src/gallium/drivers/r600/sb/sb_dce_cleanup.cpp +@@ -30,6 +30,18 @@ + + namespace r600_sb { + ++int dce_cleanup::run() { ++ int r; ++ ++ // Run cleanup for as long as there are unused nodes. ++ do { ++ nodes_changed = false; ++ r = vpass::run(); ++ } while (r == 0 && nodes_changed); ++ ++ return r; ++} ++ + bool dce_cleanup::visit(node& n, bool enter) { + if (enter) { + } else { +@@ -110,7 +122,18 @@ bool dce_cleanup::visit(region_node& n, bool enter) { + void dce_cleanup::cleanup_dst(node& n) { + if (!cleanup_dst_vec(n.dst) && remove_unused && + !n.dst.empty() && !(n.flags & NF_DONT_KILL) && n.parent) ++ { ++ // Delete use references to the removed node from the src values. ++ for (vvec::iterator I = n.src.begin(), E = n.src.end(); I != E; ++I) { ++ value* v = *I; ++ if (v && v->def && v->uses.size()) ++ { ++ v->remove_use(&n); ++ } ++ } + n.remove(); ++ nodes_changed = true; ++ } + } + + bool dce_cleanup::visit(container_node& n, bool enter) { +@@ -130,7 +153,7 @@ bool dce_cleanup::cleanup_dst_vec(vvec& vv) { + if (v->gvn_source && v->gvn_source->is_dead()) + v->gvn_source = NULL; + +- if (v->is_dead() || (remove_unused && !v->is_rel() && !v->uses)) ++ if (v->is_dead() || (remove_unused && !v->is_rel() && !v->uses.size())) + v = NULL; + else + alive = true; +diff --git a/src/gallium/drivers/r600/sb/sb_gcm.cpp b/src/gallium/drivers/r600/sb/sb_gcm.cpp +index 236b2ea..9c75389 100644 +--- a/src/gallium/drivers/r600/sb/sb_gcm.cpp ++++ b/src/gallium/drivers/r600/sb/sb_gcm.cpp +@@ -199,10 +199,9 @@ void gcm::td_release_val(value *v) { + sblog << "\n"; + ); + +- use_info *u = v->uses; +- while (u) { ++ for (uselist::iterator I = v->uses.begin(), E = v->uses.end(); I != E; ++I) { ++ use_info *u = *I; + if (u->op->parent != &pending) { +- u = u->next; + continue; + } + +@@ -212,6 +211,7 @@ void gcm::td_release_val(value *v) { + sblog << "\n"; + ); + ++ assert(uses[u->op] > 0); + if (--uses[u->op] == 0) { + GCM_DUMP( + sblog << "td released : "; +@@ -222,7 +222,6 @@ void gcm::td_release_val(value *v) { + pending.remove_node(u->op); + ready.push_back(u->op); + } +- u = u->next; + } + + } +diff --git a/src/gallium/drivers/r600/sb/sb_ir.cpp b/src/gallium/drivers/r600/sb/sb_ir.cpp +index 5226893..d989dce 100644 +--- a/src/gallium/drivers/r600/sb/sb_ir.cpp ++++ b/src/gallium/drivers/r600/sb/sb_ir.cpp +@@ -255,7 +255,7 @@ void container_node::expand() { + void node::remove() {parent->remove_node(this); + } + +-value_hash node::hash_src() { ++value_hash node::hash_src() const { + + value_hash h = 12345; + +@@ -269,7 +269,7 @@ value_hash node::hash_src() { + } + + +-value_hash node::hash() { ++value_hash node::hash() const { + + if (parent && parent->subtype == NST_LOOP_PHI_CONTAINER) + return 47451; +diff --git a/src/gallium/drivers/r600/sb/sb_ir.h b/src/gallium/drivers/r600/sb/sb_ir.h +index 4fc4da2..74c0549 100644 +--- a/src/gallium/drivers/r600/sb/sb_ir.h ++++ b/src/gallium/drivers/r600/sb/sb_ir.h +@@ -446,15 +446,16 @@ enum use_kind { + }; + + struct use_info { +- use_info *next; + node *op; + use_kind kind; + int arg; + +- use_info(node *n, use_kind kind, int arg, use_info* next) +- : next(next), op(n), kind(kind), arg(arg) {} ++ use_info(node *n, use_kind kind, int arg) ++ : op(n), kind(kind), arg(arg) {} + }; + ++typedef std::list< use_info * > uselist; ++ + enum constraint_kind { + CK_SAME_REG, + CK_PACKED_BS, +@@ -498,7 +499,7 @@ public: + value_hash ghash; + + node *def, *adef; +- use_info *uses; ++ uselist uses; + + ra_constraint *constraint; + ra_chunk *chunk; +@@ -585,6 +586,7 @@ public: + } + + void add_use(node *n, use_kind kind, int arg); ++ void remove_use(const node *n); + + value_hash hash(); + value_hash rel_hash(); +@@ -790,8 +792,8 @@ public: + void replace_with(node *n); + void remove(); + +- virtual value_hash hash(); +- value_hash hash_src(); ++ virtual value_hash hash() const; ++ value_hash hash_src() const; + + virtual bool fold_dispatch(expr_handler *ex); + +diff --git a/src/gallium/drivers/r600/sb/sb_pass.h b/src/gallium/drivers/r600/sb/sb_pass.h +index 0346df1..e878f8c 100644 +--- a/src/gallium/drivers/r600/sb/sb_pass.h ++++ b/src/gallium/drivers/r600/sb/sb_pass.h +@@ -124,7 +124,9 @@ class dce_cleanup : public vpass { + public: + + dce_cleanup(shader &s) : vpass(s), +- remove_unused(s.dce_flags & DF_REMOVE_UNUSED) {} ++ remove_unused(s.dce_flags & DF_REMOVE_UNUSED), nodes_changed(false) {} ++ ++ virtual int run(); + + virtual bool visit(node &n, bool enter); + virtual bool visit(alu_group_node &n, bool enter); +@@ -140,6 +142,8 @@ private: + void cleanup_dst(node &n); + bool cleanup_dst_vec(vvec &vv); + ++ // Did we alter/remove nodes during a single pass? ++ bool nodes_changed; + }; + + +diff --git a/src/gallium/drivers/r600/sb/sb_valtable.cpp b/src/gallium/drivers/r600/sb/sb_valtable.cpp +index eb242b1..a8b7b49 100644 +--- a/src/gallium/drivers/r600/sb/sb_valtable.cpp ++++ b/src/gallium/drivers/r600/sb/sb_valtable.cpp +@@ -220,17 +220,33 @@ void value::add_use(node* n, use_kind kind, int arg) { + dump::dump_op(n); + sblog << " kind " << kind << " arg " << arg << "\n"; + } +- uses = new use_info(n, kind, arg, uses); ++ uses.push_back(new use_info(n, kind, arg)); + } + +-unsigned value::use_count() { +- use_info *u = uses; +- unsigned c = 0; +- while (u) { +- ++c; +- u = u->next; ++struct use_node_comp { ++ explicit use_node_comp(const node *n) : n(n) {} ++ bool operator() (const use_info *u) { ++ return u->op->hash() == n->hash(); ++ } ++ ++ private: ++ const node *n; ++}; ++ ++void value::remove_use(const node *n) { ++ uselist::iterator it = ++ std::find_if(uses.begin(), uses.end(), use_node_comp(n)); ++ ++ if (it != uses.end()) ++ { ++ // TODO assert((*it)->kind == kind) ? ++ // TODO assert((*it)->arg == arg) ? ++ uses.erase(it); + } +- return c; ++} ++ ++unsigned value::use_count() { ++ return uses.size(); + } + + bool value::is_global() { +@@ -274,13 +290,7 @@ bool value::is_prealloc() { + } + + void value::delete_uses() { +- use_info *u, *c = uses; +- while (c) { +- u = c->next; +- delete c; +- c = u; +- } +- uses = NULL; ++ uses.erase(uses.begin(), uses.end()); + } + + void ra_constraint::update_values() { +@@ -468,7 +478,7 @@ bool r600_sb::sb_value_set::add_vec(vvec& vv) { + bool r600_sb::sb_value_set::contains(value* v) { + unsigned b = v->uid - 1; + if (b < bs.size()) +- return bs.get(v->uid - 1); ++ return bs.get(b); + else + return false; + } diff -Nru mesa-13.0.2/debian/patches/series mesa-13.0.3/debian/patches/series --- mesa-13.0.2/debian/patches/series 2017-01-14 01:32:35.000000000 +0000 +++ mesa-13.0.3/debian/patches/series 2017-01-14 01:32:36.000000000 +0000 @@ -1,9 +1,18 @@ 07_gallium-fix-build-failure-on-powerpcspe.diff workaround-binutils-mips-844357.diff - - +vl-zscan-fix-Fix-trivial-sign-compare-warnings.diff +r600-sb-Fix-loop-optimization-related-hangs-on-eg.diff # Ubuntu patches. egl-platform-mir.patch i915-dont-default-to-2.1.patch + +0001-intel-blorp_blit-Create-structure-for-src-dst-coordi.patch +0002-intel-blorp_blit-Split-blorp-blits-if-they-are-too-l.patch +0003-intel-blorp_blit-Adjust-blorp-surface-parameters-for.patch +0004-intel-blorp-Always-use-UINT-formats-on-SKL.patch +0005-intel-blorp_blit-Move-RGB-R-conversion-to-follow-bli.patch +0006-intel-blorp_blit-Enable-splitting-large-blorp-blits.patch +0007-intel-blorp_blit-Add-split_blorp_blit_debug-switch.patch +0008-i965-Increase-max-texture-to-16k-for-gen7.patch diff -Nru mesa-13.0.2/debian/patches/vl-zscan-fix-Fix-trivial-sign-compare-warnings.diff mesa-13.0.3/debian/patches/vl-zscan-fix-Fix-trivial-sign-compare-warnings.diff --- mesa-13.0.2/debian/patches/vl-zscan-fix-Fix-trivial-sign-compare-warnings.diff 1970-01-01 00:00:00.000000000 +0000 +++ mesa-13.0.3/debian/patches/vl-zscan-fix-Fix-trivial-sign-compare-warnings.diff 2017-01-14 01:32:36.000000000 +0000 @@ -0,0 +1,28 @@ +commit ac57bcda1e0e6dcfa81e24468d5b682686120649 +Author: Christian König +Date: Wed Dec 14 15:03:35 2016 +0100 + + vl/zscan: fix "Fix trivial sign compare warnings" + + The variable actually needs to be signed, otherwise converting it to a + float doesn't work as expected. + + Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=98914 + Signed-off-by: Christian König + Reviewed-by: Nayan Deshmukh + Cc: "13.0" + Fixes: 1fb4179f927 ("vl: Fix trivial sign compare warnings") + +diff --git a/src/gallium/auxiliary/vl/vl_zscan.c b/src/gallium/auxiliary/vl/vl_zscan.c +index ef05af4..24d6452 100644 +--- a/src/gallium/auxiliary/vl/vl_zscan.c ++++ b/src/gallium/auxiliary/vl/vl_zscan.c +@@ -152,7 +152,7 @@ create_vert_shader(struct vl_zscan *zscan) + for (i = 0; i < zscan->num_channels; ++i) { + ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), + ureg_imm1f(shader, 1.0f / (zscan->blocks_per_line * VL_BLOCK_WIDTH) +- * (i - (signed)zscan->num_channels / 2))); ++ * ((signed)i - (signed)zscan->num_channels / 2))); + + ureg_MAD(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_X), vrect, + ureg_imm1f(shader, 1.0f / zscan->blocks_per_line), ureg_src(tmp)); diff -Nru mesa-13.0.2/debian/rules mesa-13.0.3/debian/rules --- mesa-13.0.2/debian/rules 2017-01-14 01:32:35.000000000 +0000 +++ mesa-13.0.3/debian/rules 2017-01-14 01:32:36.000000000 +0000 @@ -8,18 +8,11 @@ DEB_HOST_MULTIARCH ?= $(shell dpkg-architecture -qDEB_HOST_MULTIARCH) DEB_HOST_ARCH ?= $(shell dpkg-architecture -qDEB_HOST_ARCH) DEB_HOST_ARCH_OS ?= $(shell dpkg-architecture -qDEB_HOST_ARCH_OS) -DEB_HOST_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_HOST_GNU_TYPE) -DEB_BUILD_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_BUILD_GNU_TYPE) DEB_HOST_ARCH_CPU ?= $(shell dpkg-architecture -qDEB_HOST_ARCH_CPU) -ifeq ($(DEB_BUILD_GNU_TYPE), $(DEB_HOST_GNU_TYPE)) - confflags += --build=$(DEB_HOST_GNU_TYPE) -else - confflags += --build=$(DEB_BUILD_GNU_TYPE) --host=$(DEB_HOST_GNU_TYPE) -endif ifeq (,$(filter $(DEB_HOST_ARCH), armhf)) buildflags = \ - $(shell DEB_CFLAGS_MAINT_APPEND="-Wall -O2" DEB_CXXFLAGS_MAINT_APPEND="-Wall -O2" dpkg-buildflags --export=configure) + $(shell DEB_CFLAGS_MAINT_APPEND=-Wall DEB_CXXFLAGS_MAINT_APPEND=-Wall dpkg-buildflags --export=configure) else # Workaround for a variant of LP: #725126 buildflags = \ @@ -114,6 +107,12 @@ confflags_GALLIUM += --enable-lmsensors endif +# Disable assembly usage on x32 otherwise Mesa defaults to x86_64 assembly +# which doesn't work on x32 (see #758094) +ifneq (,$(filter $(DEB_HOST_ARCH), x32)) + confflags += --disable-asm +endif + confflags_EGL = --with-egl-platforms="$(EGL_DISPLAYS)" confflags_GLES = --enable-gles1 --enable-gles2 confflags_GALLIUM += --with-gallium-drivers="$(GALLIUM_DRIVERS)" @@ -153,7 +152,6 @@ override_dh_auto_configure: dh_auto_configure -- \ - --disable-silent-rules \ $(confflags) override_dh_auto_install: diff -Nru mesa-13.0.2/debian/source/format mesa-13.0.3/debian/source/format --- mesa-13.0.2/debian/source/format 1970-01-01 00:00:00.000000000 +0000 +++ mesa-13.0.3/debian/source/format 2017-01-14 01:32:36.000000000 +0000 @@ -0,0 +1 @@ +1.0 diff -Nru mesa-13.0.2/docs/relnotes/13.0.2.html mesa-13.0.3/docs/relnotes/13.0.2.html --- mesa-13.0.2/docs/relnotes/13.0.2.html 2016-11-28 15:05:25.000000000 +0000 +++ mesa-13.0.3/docs/relnotes/13.0.2.html 2016-12-12 23:51:29.000000000 +0000 @@ -31,7 +31,8 @@

SHA256 checksums

-TBD
+6014233a5db6032ab8de4881384871bbe029de684502707794ce7b3e6beec308  mesa-13.0.2.tar.gz
+a6ed622645f4ed61da418bf65adde5bcc4bb79023c36ba7d6b45b389da4416d5  mesa-13.0.2.tar.xz
 
diff -Nru mesa-13.0.2/docs/relnotes/13.0.3.html mesa-13.0.3/docs/relnotes/13.0.3.html --- mesa-13.0.2/docs/relnotes/13.0.3.html 1970-01-01 00:00:00.000000000 +0000 +++ mesa-13.0.3/docs/relnotes/13.0.3.html 2017-01-05 15:50:24.000000000 +0000 @@ -0,0 +1,176 @@ + + + + + Mesa Release Notes + + + + +
+

The Mesa 3D Graphics Library

+
+ + +
+ +

Mesa 13.0.3 Release Notes / January 5, 2017

+ +

+Mesa 13.0.3 is a bug fix release which fixes bugs found since the 13.0.2 release. +

+

+Mesa 13.0.3 implements the OpenGL 4.4 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.4. OpenGL +4.4 is only available if requested at context creation +because compatibility contexts are not supported. +

+ + +

SHA256 checksums

+
+TBD
+
+ + +

New features

+

None

+ + +

Bug fixes

+ +
    + +
  • Bug 77662 - Fail to render to different faces of depth-stencil cube map
  • + +
  • Bug 92234 - [BDW] GPU hang in Shogun2
  • + +
  • Bug 98329 - [dEQP, EGL, SKL, BDW, BSW] dEQP-EGL.functional.image.render_multiple_contexts.gles2_renderbuffer_depth16_depth_buffer
  • + +
  • Bug 99038 - [dEQP, EGL, SKL, BDW, BSW] dEQP-EGL.functional.negative_api.create_pixmap_surface crashes
  • + +
+ + +

Changes

+ +

Chad Versace (2):

+
    +
  • i965/mt: Disable aux surfaces after making miptree shareable
  • +
  • egl: Fix crashes in eglCreate*Surface()
  • +
+ +

Dave Airlie (4):

+
    +
  • anv: set maxFragmentDualSrcAttachments to 1
  • +
  • radv: set maxFragmentDualSrcAttachments to 1
  • +
  • radv: fix another regression since shadow fixes.
  • +
  • radv: add missing license file to radv_meta_bufimage.
  • +
+ +

Emil Velikov (5):

+
    +
  • docs: add sha256 checksums for 13.0.2
  • +
  • anv: don't double-close the same fd
  • +
  • anv: don't leak memory if anv_init_wsi() fails
  • +
  • radv: don't leak the fd if radv_physical_device_init() succeeds
  • +
  • Update version to 13.0.3
  • +
+ +

Eric Anholt (1):

+
    +
  • vc4: In a loop break/continue, jump if everyone has taken the path.
  • +
+ +

Gwan-gyeong Mun (3):

+
    +
  • anv: Add missing error-checking to anv_block_pool_init (v2)
  • +
  • anv: Update the teardown in reverse order of the anv_CreateDevice
  • +
  • vulkan/wsi: Fix resource leak in success path of wsi_queue_init()
  • +
+ +

Haixia Shi (1):

+
    +
  • compiler/glsl: fix precision problem of tanh
  • +
+ +

Ilia Mirkin (1):

+
    +
  • mesa: only verify that enabled arrays have backing buffers
  • +
+ +

Jason Ekstrand (8):

+
    +
  • anv/cmd_buffer: Re-emit MEDIA_CURBE_LOAD when CS push constants are dirty
  • +
  • anv/image: Rename hiz_surface to aux_surface
  • +
  • anv/cmd_buffer: Remove the 1-D case from the HiZ QPitch calculation
  • +
  • genxml/gen9: Change the default of MI_SEMAPHORE_WAIT::RegisterPoleMode
  • +
  • anv/device: Return the right error for failed maps
  • +
  • anv/device: Implicitly unmap memory objects in FreeMemory
  • +
  • anv/descriptor_set: Write the state offset in the surface state free list.
  • +
  • spirv: Use a simpler and more correct implementaiton of tanh()
  • +
+ +

Kenneth Graunke (1):

+
    +
  • i965: Allocate at least some URB space even when max_vertices = 0.
  • +
+ +

Marek Olšák (17):

+
    +
  • radeonsi: always set all blend registers
  • +
  • radeonsi: set CB_BLEND1_CONTROL.ENABLE for dual source blending
  • +
  • radeonsi: disable RB+ blend optimizations for dual source blending
  • +
  • radeonsi: consolidate max-work-group-size computation
  • +
  • radeonsi: apply a multi-wave workgroup SPI bug workaround to affected CIK chips
  • +
  • radeonsi: apply a TC L1 write corruption workaround for SI
  • +
  • radeonsi: apply a tessellation bug workaround for SI
  • +
  • radeonsi: add a tess+GS hang workaround for VI dGPUs
  • +
  • radeonsi: apply the double EVENT_WRITE_EOP workaround to VI as well
  • +
  • cso: don't release sampler states that are bound
  • +
  • radeonsi: always restore sampler states when unbinding sampler views
  • +
  • radeonsi: fix incorrect FMASK checking in bind_sampler_states
  • +
  • radeonsi: allow specifying simm16 of emit_waitcnt at call sites
  • +
  • radeonsi: wait for outstanding memory instructions in TCS barriers
  • +
  • tgsi: fix the src type of TGSI_OPCODE_MEMBAR
  • +
  • radeonsi: wait for outstanding LDS instructions in memory barriers if needed
  • +
  • radeonsi: disable the constant engine (CE) on Carrizo and Stoney
  • +
+ +

Matt Turner (3):

+
    +
  • i965/fs: Rename opt_copy_propagate -> opt_copy_propagation.
  • +
  • i965/fs: Add unit tests for copy propagation pass.
  • +
  • i965/fs: Reject copy propagation into SEL if not min/max.
  • +
+ +

Nanley Chery (1):

+
    +
  • mesa/fbobject: Update CubeMapFace when reusing textures
  • +
+ +

Nicolai Hähnle (4):

+
    +
  • radeonsi: fix isolines tess factor writes to control ring
  • +
  • radeonsi: update all GSVS ring descriptors for new buffer allocations
  • +
  • radeonsi: do not kill GS with memory writes
  • +
  • radeonsi: fix an off-by-one error in the bounds check for max_vertices
  • +
+ +

Rhys Kidd (1):

+
    +
  • glsl: Add pthread libs to cache_test
  • +
+ +

Timothy Arceri (2):

+
    +
  • mesa: fix active subroutine uniforms properly
  • +
  • Revert "nir: Turn imov/fmov of undef into undef."
  • +
+ + +
+ + diff -Nru mesa-13.0.2/src/amd/common/ac_nir_to_llvm.c mesa-13.0.3/src/amd/common/ac_nir_to_llvm.c --- mesa-13.0.2/src/amd/common/ac_nir_to_llvm.c 2016-11-28 12:56:18.000000000 +0000 +++ mesa-13.0.3/src/amd/common/ac_nir_to_llvm.c 2016-12-14 19:03:11.000000000 +0000 @@ -3545,7 +3545,7 @@ if (instr->op == nir_texop_query_levels) result = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, 3, false), ""); - else if (instr->is_shadow && instr->op != nir_texop_txs && instr->op != nir_texop_lod) + else if (instr->is_shadow && instr->op != nir_texop_txs && instr->op != nir_texop_lod && instr->op != nir_texop_tg4) result = LLVMBuildExtractElement(ctx->builder, result, ctx->i32zero, ""); else if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && diff -Nru mesa-13.0.2/src/amd/vulkan/radv_device.c mesa-13.0.3/src/amd/vulkan/radv_device.c --- mesa-13.0.2/src/amd/vulkan/radv_device.c 2016-11-28 12:56:17.000000000 +0000 +++ mesa-13.0.3/src/amd/vulkan/radv_device.c 2016-12-14 19:03:10.000000000 +0000 @@ -91,6 +91,7 @@ fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n"); device->name = device->rad_info.name; + close(fd); return VK_SUCCESS; fail: @@ -424,7 +425,7 @@ .maxGeometryTotalOutputComponents = 1024, .maxFragmentInputComponents = 128, .maxFragmentOutputAttachments = 8, - .maxFragmentDualSrcAttachments = 2, + .maxFragmentDualSrcAttachments = 1, .maxFragmentCombinedOutputResources = 8, .maxComputeSharedMemorySize = 32768, .maxComputeWorkGroupCount = { 65535, 65535, 65535 }, diff -Nru mesa-13.0.2/src/amd/vulkan/radv_meta_bufimage.c mesa-13.0.3/src/amd/vulkan/radv_meta_bufimage.c --- mesa-13.0.2/src/amd/vulkan/radv_meta_bufimage.c 2016-11-10 22:06:40.000000000 +0000 +++ mesa-13.0.3/src/amd/vulkan/radv_meta_bufimage.c 2016-12-16 14:03:24.000000000 +0000 @@ -1,6 +1,33 @@ +/* + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ #include "radv_meta.h" #include "nir/nir_builder.h" +/* + * Compute shader implementation of image->buffer copy. + */ + static nir_shader * build_nir_itob_compute_shader(struct radv_device *dev) { diff -Nru mesa-13.0.2/src/amd/vulkan/radv_timestamp.h mesa-13.0.3/src/amd/vulkan/radv_timestamp.h --- mesa-13.0.2/src/amd/vulkan/radv_timestamp.h 2016-11-28 15:16:44.000000000 +0000 +++ mesa-13.0.3/src/amd/vulkan/radv_timestamp.h 2017-01-05 15:53:05.000000000 +0000 @@ -1 +1 @@ -#define RADV_TIMESTAMP "1480346204" +#define RADV_TIMESTAMP "1483631585" diff -Nru mesa-13.0.2/src/compiler/glsl/builtin_functions.cpp mesa-13.0.3/src/compiler/glsl/builtin_functions.cpp --- mesa-13.0.2/src/compiler/glsl/builtin_functions.cpp 2016-11-10 22:05:17.000000000 +0000 +++ mesa-13.0.3/src/compiler/glsl/builtin_functions.cpp 2016-12-16 14:03:24.000000000 +0000 @@ -3563,9 +3563,17 @@ ir_variable *x = in_var(type, "x"); MAKE_SIG(type, v130, 1, x); + /* Clamp x to [-10, +10] to avoid precision problems. + * When x > 10, e^(-x) is so small relative to e^x that it gets flushed to + * zero in the computation e^x + e^(-x). The same happens in the other + * direction when x < -10. + */ + ir_variable *t = body.make_temp(type, "tmp"); + body.emit(assign(t, min2(max2(x, imm(-10.0f)), imm(10.0f)))); + /* (e^x - e^(-x)) / (e^x + e^(-x)) */ - body.emit(ret(div(sub(exp(x), exp(neg(x))), - add(exp(x), exp(neg(x)))))); + body.emit(ret(div(sub(exp(t), exp(neg(t))), + add(exp(t), exp(neg(t)))))); return sig; } diff -Nru mesa-13.0.2/src/compiler/glsl/linker.cpp mesa-13.0.3/src/compiler/glsl/linker.cpp --- mesa-13.0.2/src/compiler/glsl/linker.cpp 2016-11-10 22:06:40.000000000 +0000 +++ mesa-13.0.3/src/compiler/glsl/linker.cpp 2016-12-14 19:03:10.000000000 +0000 @@ -3118,7 +3118,6 @@ if (!uni) continue; - sh->NumSubroutineUniforms++; count = 0; if (sh->NumSubroutineFunctions == 0) { linker_error(prog, "subroutine uniform %s defined but no valid functions found\n", uni->type->name); diff -Nru mesa-13.0.2/src/compiler/glsl/link_uniforms.cpp mesa-13.0.3/src/compiler/glsl/link_uniforms.cpp --- mesa-13.0.2/src/compiler/glsl/link_uniforms.cpp 2016-11-10 22:06:40.000000000 +0000 +++ mesa-13.0.3/src/compiler/glsl/link_uniforms.cpp 2016-12-14 19:03:10.000000000 +0000 @@ -633,6 +633,8 @@ uniform->opaque[shader_type].index = this->next_subroutine; uniform->opaque[shader_type].active = true; + prog->_LinkedShaders[shader_type]->NumSubroutineUniforms++; + /* Increment the subroutine index by 1 for non-arrays and by the * number of array elements for arrays. */ diff -Nru mesa-13.0.2/src/compiler/glsl/tests/.deps/cache_test.Po mesa-13.0.3/src/compiler/glsl/tests/.deps/cache_test.Po --- mesa-13.0.2/src/compiler/glsl/tests/.deps/cache_test.Po 2016-11-28 15:16:14.000000000 +0000 +++ mesa-13.0.3/src/compiler/glsl/tests/.deps/cache_test.Po 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -# dummy diff -Nru mesa-13.0.2/src/compiler/glsl/tests/.deps/glsl_tests_cache_test-cache_test.Po mesa-13.0.3/src/compiler/glsl/tests/.deps/glsl_tests_cache_test-cache_test.Po --- mesa-13.0.2/src/compiler/glsl/tests/.deps/glsl_tests_cache_test-cache_test.Po 1970-01-01 00:00:00.000000000 +0000 +++ mesa-13.0.3/src/compiler/glsl/tests/.deps/glsl_tests_cache_test-cache_test.Po 2017-01-05 15:52:34.000000000 +0000 @@ -0,0 +1 @@ +# dummy diff -Nru mesa-13.0.2/src/compiler/Makefile.glsl.am mesa-13.0.3/src/compiler/Makefile.glsl.am --- mesa-13.0.2/src/compiler/Makefile.glsl.am 2016-11-10 22:06:40.000000000 +0000 +++ mesa-13.0.3/src/compiler/Makefile.glsl.am 2016-12-24 13:03:53.000000000 +0000 @@ -62,8 +62,11 @@ glsl_tests_cache_test_SOURCES = \ glsl/tests/cache_test.c +glsl_tests_cache_test_CFLAGS = \ + $(PTHREAD_CFLAGS) glsl_tests_cache_test_LDADD = \ - glsl/libglsl.la + glsl/libglsl.la \ + $(PTHREAD_LIBS) glsl_tests_general_ir_test_SOURCES = \ glsl/tests/builtin_variable_test.cpp \ diff -Nru mesa-13.0.2/src/compiler/Makefile.in mesa-13.0.3/src/compiler/Makefile.in --- mesa-13.0.2/src/compiler/Makefile.in 2016-11-28 15:16:02.000000000 +0000 +++ mesa-13.0.3/src/compiler/Makefile.in 2017-01-05 15:52:22.000000000 +0000 @@ -401,9 +401,15 @@ am_glsl_tests_blob_test_OBJECTS = glsl/tests/blob_test.$(OBJEXT) glsl_tests_blob_test_OBJECTS = $(am_glsl_tests_blob_test_OBJECTS) glsl_tests_blob_test_DEPENDENCIES = glsl/libglsl.la -am_glsl_tests_cache_test_OBJECTS = glsl/tests/cache_test.$(OBJEXT) +am_glsl_tests_cache_test_OBJECTS = \ + glsl/tests/glsl_tests_cache_test-cache_test.$(OBJEXT) glsl_tests_cache_test_OBJECTS = $(am_glsl_tests_cache_test_OBJECTS) -glsl_tests_cache_test_DEPENDENCIES = glsl/libglsl.la +glsl_tests_cache_test_DEPENDENCIES = glsl/libglsl.la \ + $(am__DEPENDENCIES_1) +glsl_tests_cache_test_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(glsl_tests_cache_test_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ am_glsl_tests_general_ir_test_OBJECTS = \ glsl/tests/builtin_variable_test.$(OBJEXT) \ glsl/tests/invalidate_locations_test.$(OBJEXT) \ @@ -1389,8 +1395,12 @@ glsl_tests_cache_test_SOURCES = \ glsl/tests/cache_test.c +glsl_tests_cache_test_CFLAGS = \ + $(PTHREAD_CFLAGS) + glsl_tests_cache_test_LDADD = \ - glsl/libglsl.la + glsl/libglsl.la \ + $(PTHREAD_LIBS) glsl_tests_general_ir_test_SOURCES = \ glsl/tests/builtin_variable_test.cpp \ @@ -2024,12 +2034,13 @@ glsl/tests/blob-test$(EXEEXT): $(glsl_tests_blob_test_OBJECTS) $(glsl_tests_blob_test_DEPENDENCIES) $(EXTRA_glsl_tests_blob_test_DEPENDENCIES) glsl/tests/$(am__dirstamp) @rm -f glsl/tests/blob-test$(EXEEXT) $(AM_V_CCLD)$(LINK) $(glsl_tests_blob_test_OBJECTS) $(glsl_tests_blob_test_LDADD) $(LIBS) -glsl/tests/cache_test.$(OBJEXT): glsl/tests/$(am__dirstamp) \ +glsl/tests/glsl_tests_cache_test-cache_test.$(OBJEXT): \ + glsl/tests/$(am__dirstamp) \ glsl/tests/$(DEPDIR)/$(am__dirstamp) glsl/tests/cache-test$(EXEEXT): $(glsl_tests_cache_test_OBJECTS) $(glsl_tests_cache_test_DEPENDENCIES) $(EXTRA_glsl_tests_cache_test_DEPENDENCIES) glsl/tests/$(am__dirstamp) @rm -f glsl/tests/cache-test$(EXEEXT) - $(AM_V_CCLD)$(LINK) $(glsl_tests_cache_test_OBJECTS) $(glsl_tests_cache_test_LDADD) $(LIBS) + $(AM_V_CCLD)$(glsl_tests_cache_test_LINK) $(glsl_tests_cache_test_OBJECTS) $(glsl_tests_cache_test_LDADD) $(LIBS) glsl/tests/builtin_variable_test.$(OBJEXT): \ glsl/tests/$(am__dirstamp) \ glsl/tests/$(DEPDIR)/$(am__dirstamp) @@ -2220,9 +2231,9 @@ @AMDEP_TRUE@@am__include@ @am__quote@glsl/glcpp/$(DEPDIR)/pp.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@glsl/tests/$(DEPDIR)/blob_test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@glsl/tests/$(DEPDIR)/builtin_variable_test.Po@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@glsl/tests/$(DEPDIR)/cache_test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@glsl/tests/$(DEPDIR)/copy_constant_to_storage_tests.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@glsl/tests/$(DEPDIR)/general_ir_test.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@glsl/tests/$(DEPDIR)/glsl_tests_cache_test-cache_test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@glsl/tests/$(DEPDIR)/invalidate_locations_test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@glsl/tests/$(DEPDIR)/sampler_types_test.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@glsl/tests/$(DEPDIR)/set_uniform_initializer_tests.Po@am__quote@ @@ -2331,6 +2342,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< +glsl/tests/glsl_tests_cache_test-cache_test.o: glsl/tests/cache_test.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(glsl_tests_cache_test_CFLAGS) $(CFLAGS) -MT glsl/tests/glsl_tests_cache_test-cache_test.o -MD -MP -MF glsl/tests/$(DEPDIR)/glsl_tests_cache_test-cache_test.Tpo -c -o glsl/tests/glsl_tests_cache_test-cache_test.o `test -f 'glsl/tests/cache_test.c' || echo '$(srcdir)/'`glsl/tests/cache_test.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) glsl/tests/$(DEPDIR)/glsl_tests_cache_test-cache_test.Tpo glsl/tests/$(DEPDIR)/glsl_tests_cache_test-cache_test.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='glsl/tests/cache_test.c' object='glsl/tests/glsl_tests_cache_test-cache_test.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(glsl_tests_cache_test_CFLAGS) $(CFLAGS) -c -o glsl/tests/glsl_tests_cache_test-cache_test.o `test -f 'glsl/tests/cache_test.c' || echo '$(srcdir)/'`glsl/tests/cache_test.c + +glsl/tests/glsl_tests_cache_test-cache_test.obj: glsl/tests/cache_test.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(glsl_tests_cache_test_CFLAGS) $(CFLAGS) -MT glsl/tests/glsl_tests_cache_test-cache_test.obj -MD -MP -MF glsl/tests/$(DEPDIR)/glsl_tests_cache_test-cache_test.Tpo -c -o glsl/tests/glsl_tests_cache_test-cache_test.obj `if test -f 'glsl/tests/cache_test.c'; then $(CYGPATH_W) 'glsl/tests/cache_test.c'; else $(CYGPATH_W) '$(srcdir)/glsl/tests/cache_test.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) glsl/tests/$(DEPDIR)/glsl_tests_cache_test-cache_test.Tpo glsl/tests/$(DEPDIR)/glsl_tests_cache_test-cache_test.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='glsl/tests/cache_test.c' object='glsl/tests/glsl_tests_cache_test-cache_test.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(glsl_tests_cache_test_CFLAGS) $(CFLAGS) -c -o glsl/tests/glsl_tests_cache_test-cache_test.obj `if test -f 'glsl/tests/cache_test.c'; then $(CYGPATH_W) 'glsl/tests/cache_test.c'; else $(CYGPATH_W) '$(srcdir)/glsl/tests/cache_test.c'; fi` + spirv/spirv2nir-spirv2nir.o: spirv/spirv2nir.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(spirv2nir_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT spirv/spirv2nir-spirv2nir.o -MD -MP -MF spirv/$(DEPDIR)/spirv2nir-spirv2nir.Tpo -c -o spirv/spirv2nir-spirv2nir.o `test -f 'spirv/spirv2nir.c' || echo '$(srcdir)/'`spirv/spirv2nir.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) spirv/$(DEPDIR)/spirv2nir-spirv2nir.Tpo spirv/$(DEPDIR)/spirv2nir-spirv2nir.Po diff -Nru mesa-13.0.2/src/compiler/nir/nir_opt_undef.c mesa-13.0.3/src/compiler/nir/nir_opt_undef.c --- mesa-13.0.2/src/compiler/nir/nir_opt_undef.c 2016-11-10 22:05:17.000000000 +0000 +++ mesa-13.0.3/src/compiler/nir/nir_opt_undef.c 2016-12-16 14:03:24.000000000 +0000 @@ -79,9 +79,7 @@ { if (alu->op != nir_op_vec2 && alu->op != nir_op_vec3 && - alu->op != nir_op_vec4 && - alu->op != nir_op_fmov && - alu->op != nir_op_imov) + alu->op != nir_op_vec4) return false; assert(alu->dest.dest.is_ssa); diff -Nru mesa-13.0.2/src/compiler/spirv/vtn_glsl450.c mesa-13.0.3/src/compiler/spirv/vtn_glsl450.c --- mesa-13.0.2/src/compiler/spirv/vtn_glsl450.c 2016-11-10 22:05:17.000000000 +0000 +++ mesa-13.0.3/src/compiler/spirv/vtn_glsl450.c 2016-12-16 14:03:24.000000000 +0000 @@ -565,16 +565,21 @@ build_exp(nb, nir_fneg(nb, src[0])))); return; - case GLSLstd450Tanh: - /* (0.5 * (e^x - e^(-x))) / (0.5 * (e^x + e^(-x))) */ - val->ssa->def = - nir_fdiv(nb, nir_fmul(nb, nir_imm_float(nb, 0.5f), - nir_fsub(nb, build_exp(nb, src[0]), - build_exp(nb, nir_fneg(nb, src[0])))), - nir_fmul(nb, nir_imm_float(nb, 0.5f), - nir_fadd(nb, build_exp(nb, src[0]), - build_exp(nb, nir_fneg(nb, src[0]))))); + case GLSLstd450Tanh: { + /* tanh(x) := (0.5 * (e^x - e^(-x))) / (0.5 * (e^x + e^(-x))) + * + * With a little algebra this reduces to (e^2x - 1) / (e^2x + 1) + * + * We clamp x to (-inf, +10] to avoid precision problems. When x > 10, + * e^2x is so much larger than 1.0 that 1.0 gets flushed to zero in the + * computation e^2x +/- 1 so it can be ignored. + */ + nir_ssa_def *x = nir_fmin(nb, src[0], nir_imm_float(nb, 10)); + nir_ssa_def *exp2x = build_exp(nb, nir_fmul(nb, x, nir_imm_float(nb, 2))); + val->ssa->def = nir_fdiv(nb, nir_fsub(nb, exp2x, nir_imm_float(nb, 1)), + nir_fadd(nb, exp2x, nir_imm_float(nb, 1))); return; + } case GLSLstd450Asinh: val->ssa->def = nir_fmul(nb, nir_fsign(nb, src[0]), diff -Nru mesa-13.0.2/src/egl/main/eglapi.c mesa-13.0.3/src/egl/main/eglapi.c --- mesa-13.0.2/src/egl/main/eglapi.c 2016-11-10 22:06:40.000000000 +0000 +++ mesa-13.0.3/src/egl/main/eglapi.c 2016-12-16 14:03:24.000000000 +0000 @@ -849,7 +849,7 @@ RETURN_EGL_ERROR(disp, EGL_BAD_NATIVE_WINDOW, EGL_NO_SURFACE); #ifdef HAVE_SURFACELESS_PLATFORM - if (disp->Platform == _EGL_PLATFORM_SURFACELESS) { + if (disp && disp->Platform == _EGL_PLATFORM_SURFACELESS) { /* From the EGL_MESA_platform_surfaceless spec (v1): * * eglCreatePlatformWindowSurface fails when called with a @@ -970,7 +970,7 @@ EGLSurface ret; #if HAVE_SURFACELESS_PLATFORM - if (disp->Platform == _EGL_PLATFORM_SURFACELESS) { + if (disp && disp->Platform == _EGL_PLATFORM_SURFACELESS) { /* From the EGL_MESA_platform_surfaceless spec (v1): * * [Like eglCreatePlatformWindowSurface,] eglCreatePlatformPixmapSurface diff -Nru mesa-13.0.2/src/gallium/auxiliary/cso_cache/cso_cache.c mesa-13.0.3/src/gallium/auxiliary/cso_cache/cso_cache.c --- mesa-13.0.2/src/gallium/auxiliary/cso_cache/cso_cache.c 2016-11-10 22:05:17.000000000 +0000 +++ mesa-13.0.3/src/gallium/auxiliary/cso_cache/cso_cache.c 2016-12-14 19:03:12.000000000 +0000 @@ -188,7 +188,9 @@ void *state) { struct cso_hash *hash = _cso_hash_for_type(sc, type); - sanitize_hash(sc, hash, type, sc->max_size); + + if (type != CSO_SAMPLER) + sanitize_hash(sc, hash, type, sc->max_size); return cso_hash_insert(hash, hash_key, state); } diff -Nru mesa-13.0.2/src/gallium/auxiliary/tgsi/tgsi_info.c mesa-13.0.3/src/gallium/auxiliary/tgsi/tgsi_info.c --- mesa-13.0.2/src/gallium/auxiliary/tgsi/tgsi_info.c 2016-11-10 22:05:17.000000000 +0000 +++ mesa-13.0.3/src/gallium/auxiliary/tgsi/tgsi_info.c 2016-12-14 19:03:12.000000000 +0000 @@ -485,6 +485,7 @@ case TGSI_OPCODE_UMUL_HI: case TGSI_OPCODE_UP2H: case TGSI_OPCODE_U2I64: + case TGSI_OPCODE_MEMBAR: return TGSI_TYPE_UNSIGNED; case TGSI_OPCODE_IMUL_HI: case TGSI_OPCODE_I2F: diff -Nru mesa-13.0.2/src/gallium/drivers/radeon/r600_pipe_common.c mesa-13.0.3/src/gallium/drivers/radeon/r600_pipe_common.c --- mesa-13.0.2/src/gallium/drivers/radeon/r600_pipe_common.c 2016-11-10 22:06:40.000000000 +0000 +++ mesa-13.0.3/src/gallium/drivers/radeon/r600_pipe_common.c 2016-12-14 19:03:11.000000000 +0000 @@ -85,7 +85,8 @@ { struct radeon_winsys_cs *cs = ctx->gfx.cs; - if (ctx->chip_class == CIK) { + if (ctx->chip_class == CIK || + ctx->chip_class == VI) { /* Two EOP events are required to make all engines go idle * (and optional cache flushes executed) before the timestamp * is written. @@ -114,7 +115,8 @@ { unsigned dwords = 6; - if (screen->chip_class == CIK) + if (screen->chip_class == CIK || + screen->chip_class == VI) dwords *= 2; if (!screen->info.has_virtual_memory) diff -Nru mesa-13.0.2/src/gallium/drivers/radeonsi/si_compute.c mesa-13.0.3/src/gallium/drivers/radeonsi/si_compute.c --- mesa-13.0.2/src/gallium/drivers/radeonsi/si_compute.c 2016-11-24 16:34:41.000000000 +0000 +++ mesa-13.0.3/src/gallium/drivers/radeonsi/si_compute.c 2016-12-14 19:03:11.000000000 +0000 @@ -343,6 +343,7 @@ lds_blocks += align(program->local_size, 512) >> 9; } + /* TODO: use si_multiwave_lds_size_workaround */ assert(lds_blocks <= 0xFF); config->rsrc2 &= C_00B84C_LDS_SIZE; diff -Nru mesa-13.0.2/src/gallium/drivers/radeonsi/si_descriptors.c mesa-13.0.3/src/gallium/drivers/radeonsi/si_descriptors.c --- mesa-13.0.2/src/gallium/drivers/radeonsi/si_descriptors.c 2016-11-10 22:06:40.000000000 +0000 +++ mesa-13.0.3/src/gallium/drivers/radeonsi/si_descriptors.c 2016-12-14 19:03:12.000000000 +0000 @@ -413,13 +413,13 @@ struct si_sampler_views *views = &sctx->samplers[shader].views; struct si_sampler_view *rview = (struct si_sampler_view*)view; struct si_descriptors *descs = si_sampler_descriptors(sctx, shader); + uint32_t *desc = descs->list + slot * 16; if (views->views[slot] == view && !disallow_early_out) return; if (view) { struct r600_texture *rtex = (struct r600_texture *)view->texture; - uint32_t *desc = descs->list + slot * 16; assert(rtex); /* views with texture == NULL aren't supported */ pipe_sampler_view_reference(&views->views[slot], view); @@ -468,9 +468,14 @@ rview->is_stencil_sampler, true); } else { pipe_sampler_view_reference(&views->views[slot], NULL); - memcpy(descs->list + slot*16, null_texture_descriptor, 8*4); + memcpy(desc, null_texture_descriptor, 8*4); /* Only clear the lower dwords of FMASK. */ - memcpy(descs->list + slot*16 + 8, null_texture_descriptor, 4*4); + memcpy(desc + 8, null_texture_descriptor, 4*4); + /* Re-set the sampler state if we are transitioning from FMASK. */ + if (views->sampler_states[slot]) + memcpy(desc + 12, + views->sampler_states[slot], 4*4); + views->enabled_mask &= ~(1u << slot); } @@ -803,10 +808,10 @@ /* If FMASK is bound, don't overwrite it. * The sampler state will be set after FMASK is unbound. */ - if (samplers->views.views[i] && - samplers->views.views[i]->texture && - samplers->views.views[i]->texture->target != PIPE_BUFFER && - ((struct r600_texture*)samplers->views.views[i]->texture)->fmask.size) + if (samplers->views.views[slot] && + samplers->views.views[slot]->texture && + samplers->views.views[slot]->texture->target != PIPE_BUFFER && + ((struct r600_texture*)samplers->views.views[slot]->texture)->fmask.size) continue; memcpy(desc->list + slot * 16 + 12, sstates[i]->val, 4*4); diff -Nru mesa-13.0.2/src/gallium/drivers/radeonsi/si_pipe.c mesa-13.0.3/src/gallium/drivers/radeonsi/si_pipe.c --- mesa-13.0.2/src/gallium/drivers/radeonsi/si_pipe.c 2016-11-10 22:06:40.000000000 +0000 +++ mesa-13.0.3/src/gallium/drivers/radeonsi/si_pipe.c 2016-12-14 19:03:12.000000000 +0000 @@ -187,7 +187,10 @@ /* SI + AMDGPU + CE = GPU hang */ if (!(sscreen->b.debug_flags & DBG_NO_CE) && ws->cs_add_const_ib && - sscreen->b.chip_class != SI) { + sscreen->b.chip_class != SI && + /* These can't use CE due to a power gating bug in the kernel. */ + sscreen->b.family != CHIP_CARRIZO && + sscreen->b.family != CHIP_STONEY) { sctx->ce_ib = ws->cs_add_const_ib(sctx->b.gfx.cs); if (!sctx->ce_ib) goto fail; diff -Nru mesa-13.0.2/src/gallium/drivers/radeonsi/si_shader.c mesa-13.0.3/src/gallium/drivers/radeonsi/si_shader.c --- mesa-13.0.2/src/gallium/drivers/radeonsi/si_shader.c 2016-11-10 22:06:40.000000000 +0000 +++ mesa-13.0.3/src/gallium/drivers/radeonsi/si_shader.c 2016-12-16 14:03:24.000000000 +0000 @@ -2577,10 +2577,18 @@ lp_build_const_int32(gallivm, tess_outer_index * 4), ""); - for (i = 0; i < outer_comps; i++) - out[i] = lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_outer); - for (i = 0; i < inner_comps; i++) - out[outer_comps+i] = lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_inner); + if (shader->key.tcs.epilog.prim_mode == PIPE_PRIM_LINES) { + /* For isolines, the hardware expects tess factors in the + * reverse order from what GLSL / TGSI specify. + */ + out[0] = lds_load(bld_base, TGSI_TYPE_SIGNED, 1, lds_outer); + out[1] = lds_load(bld_base, TGSI_TYPE_SIGNED, 0, lds_outer); + } else { + for (i = 0; i < outer_comps; i++) + out[i] = lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_outer); + for (i = 0; i < inner_comps; i++) + out[outer_comps+i] = lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_inner); + } /* Convert the outputs to vectors for stores. */ vec0 = lp_build_gather_values(gallivm, out, MIN2(stride, 4)); @@ -3301,6 +3309,7 @@ * point in the program by emitting empty inline assembly that is marked as * having side effects. */ +#if 0 /* unused currently */ static void emit_optimization_barrier(struct si_shader_context *ctx) { LLVMBuilderRef builder = ctx->gallivm.builder; @@ -3308,13 +3317,19 @@ LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, "", "", true, false); LLVMBuildCall(builder, inlineasm, NULL, 0, ""); } +#endif -static void emit_waitcnt(struct si_shader_context *ctx) +/* Combine these with & instead of |. */ +#define NOOP_WAITCNT 0xf7f +#define LGKM_CNT 0x07f +#define VM_CNT 0xf70 + +static void emit_waitcnt(struct si_shader_context *ctx, unsigned simm16) { struct gallivm_state *gallivm = &ctx->gallivm; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef args[1] = { - lp_build_const_int32(gallivm, 0xf70) + lp_build_const_int32(gallivm, simm16) }; lp_build_intrinsic(builder, "llvm.amdgcn.s.waitcnt", ctx->voidt, args, 1, 0); @@ -3326,8 +3341,23 @@ struct lp_build_emit_data *emit_data) { struct si_shader_context *ctx = si_shader_context(bld_base); + LLVMValueRef src0 = lp_build_emit_fetch(bld_base, emit_data->inst, 0, 0); + unsigned flags = LLVMConstIntGetZExtValue(src0); + unsigned waitcnt = NOOP_WAITCNT; + + if (flags & TGSI_MEMBAR_THREAD_GROUP) + waitcnt &= VM_CNT & LGKM_CNT; + + if (flags & (TGSI_MEMBAR_ATOMIC_BUFFER | + TGSI_MEMBAR_SHADER_BUFFER | + TGSI_MEMBAR_SHADER_IMAGE)) + waitcnt &= VM_CNT; + + if (flags & TGSI_MEMBAR_SHARED) + waitcnt &= LGKM_CNT; - emit_waitcnt(ctx); + if (waitcnt != NOOP_WAITCNT) + emit_waitcnt(ctx, waitcnt); } static LLVMValueRef @@ -3481,7 +3511,8 @@ struct si_shader_context *ctx, struct lp_build_emit_data * emit_data, unsigned target, - bool atomic) + bool atomic, + bool force_glc) { const struct tgsi_full_instruction *inst = emit_data->inst; LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0); @@ -3489,6 +3520,7 @@ LLVMValueRef r128 = i1false; LLVMValueRef da = tgsi_is_array_image(target) ? i1true : i1false; LLVMValueRef glc = + force_glc || inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE) ? i1true : i1false; LLVMValueRef slc = i1false; @@ -3543,7 +3575,8 @@ LLVMValueRef rsrc, LLVMValueRef index, LLVMValueRef offset, - bool atomic) + bool atomic, + bool force_glc) { const struct tgsi_full_instruction *inst = emit_data->inst; LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0); @@ -3554,6 +3587,7 @@ emit_data->args[emit_data->arg_count++] = offset; /* voffset */ if (!atomic) { emit_data->args[emit_data->arg_count++] = + force_glc || inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE) ? i1true : i1false; /* glc */ } @@ -3583,7 +3617,7 @@ offset = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, ""); buffer_append_args(ctx, emit_data, rsrc, bld_base->uint_bld.zero, - offset, false); + offset, false, false); } else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) { LLVMValueRef coords; @@ -3593,14 +3627,14 @@ if (target == TGSI_TEXTURE_BUFFER) { rsrc = extract_rsrc_top_half(ctx, rsrc); buffer_append_args(ctx, emit_data, rsrc, coords, - bld_base->uint_bld.zero, false); + bld_base->uint_bld.zero, false, false); } else { emit_data->args[0] = coords; emit_data->args[1] = rsrc; emit_data->args[2] = lp_build_const_int32(gallivm, 15); /* dmask */ emit_data->arg_count = 3; - image_append_args(ctx, emit_data, target, false); + image_append_args(ctx, emit_data, target, false, false); } } } @@ -3727,7 +3761,7 @@ } if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE) - emit_waitcnt(ctx); + emit_waitcnt(ctx, VM_CNT); if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) { load_emit_buffer(ctx, emit_data); @@ -3790,11 +3824,19 @@ offset = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, ""); buffer_append_args(ctx, emit_data, rsrc, bld_base->uint_bld.zero, - offset, false); + offset, false, false); } else if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE) { unsigned target = inst->Memory.Texture; LLVMValueRef coords; + /* 8bit/16bit TC L1 write corruption bug on SI. + * All store opcodes not aligned to a dword are affected. + * + * The only way to get unaligned stores in radeonsi is through + * shader images. + */ + bool force_glc = ctx->screen->b.chip_class == SI; + coords = image_fetch_coords(bld_base, inst, 0); if (target == TGSI_TEXTURE_BUFFER) { @@ -3802,14 +3844,14 @@ rsrc = extract_rsrc_top_half(ctx, rsrc); buffer_append_args(ctx, emit_data, rsrc, coords, - bld_base->uint_bld.zero, false); + bld_base->uint_bld.zero, false, force_glc); } else { emit_data->args[1] = coords; image_fetch_rsrc(bld_base, &memory, true, &emit_data->args[2]); emit_data->args[3] = lp_build_const_int32(gallivm, 15); /* dmask */ emit_data->arg_count = 4; - image_append_args(ctx, emit_data, target, false); + image_append_args(ctx, emit_data, target, false, force_glc); } } } @@ -3929,7 +3971,7 @@ } if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE) - emit_waitcnt(ctx); + emit_waitcnt(ctx, VM_CNT); if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) { store_emit_buffer(ctx, emit_data); @@ -3993,7 +4035,7 @@ offset = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, ""); buffer_append_args(ctx, emit_data, rsrc, bld_base->uint_bld.zero, - offset, true); + offset, true, false); } else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) { unsigned target = inst->Memory.Texture; LLVMValueRef coords; @@ -4005,12 +4047,12 @@ if (target == TGSI_TEXTURE_BUFFER) { rsrc = extract_rsrc_top_half(ctx, rsrc); buffer_append_args(ctx, emit_data, rsrc, coords, - bld_base->uint_bld.zero, true); + bld_base->uint_bld.zero, true, false); } else { emit_data->args[emit_data->arg_count++] = coords; emit_data->args[emit_data->arg_count++] = rsrc; - image_append_args(ctx, emit_data, target, true); + image_append_args(ctx, emit_data, target, true, false); } } } @@ -5247,6 +5289,7 @@ struct si_shader *shader = ctx->shader; struct tgsi_shader_info *info = &shader->selector->info; struct gallivm_state *gallivm = bld_base->base.gallivm; + struct lp_build_if_state if_state; LLVMValueRef soffset = LLVMGetParam(ctx->main_fn, SI_PARAM_GS2VS_OFFSET); LLVMValueRef gs_next_vertex; @@ -5264,19 +5307,28 @@ ""); /* If this thread has already emitted the declared maximum number of - * vertices, kill it: excessive vertex emissions are not supposed to - * have any effect, and GS threads have no externally observable - * effects other than emitting vertices. + * vertices, skip the write: excessive vertex emissions are not + * supposed to have any effect. + * + * If the shader has no writes to memory, kill it instead. This skips + * further memory loads and may allow LLVM to skip to the end + * altogether. */ - can_emit = LLVMBuildICmp(gallivm->builder, LLVMIntULE, gs_next_vertex, + can_emit = LLVMBuildICmp(gallivm->builder, LLVMIntULT, gs_next_vertex, lp_build_const_int32(gallivm, shader->selector->gs_max_out_vertices), ""); - kill = lp_build_select(&bld_base->base, can_emit, - lp_build_const_float(gallivm, 1.0f), - lp_build_const_float(gallivm, -1.0f)); - lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill", - ctx->voidt, &kill, 1, 0); + bool use_kill = !info->writes_memory; + if (use_kill) { + kill = lp_build_select(&bld_base->base, can_emit, + lp_build_const_float(gallivm, 1.0f), + lp_build_const_float(gallivm, -1.0f)); + + lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill", + ctx->voidt, &kill, 1, 0); + } else { + lp_build_if(&if_state, gallivm, can_emit); + } for (i = 0; i < info->num_outputs; i++) { LLVMValueRef *out_ptr = @@ -5302,6 +5354,7 @@ 1, 0, 1, 1, 0); } } + gs_next_vertex = lp_build_add(uint, gs_next_vertex, lp_build_const_int32(gallivm, 1)); @@ -5312,6 +5365,9 @@ args[1] = LLVMGetParam(ctx->main_fn, SI_PARAM_GS_WAVE_ID); lp_build_intrinsic(gallivm->builder, "llvm.SI.sendmsg", ctx->voidt, args, 2, 0); + + if (!use_kill) + lp_build_endif(&if_state); } /* Cut one primitive from the geometry shader */ @@ -5344,7 +5400,7 @@ * always fits into a single wave. */ if (ctx->type == PIPE_SHADER_TESS_CTRL) { - emit_optimization_barrier(ctx); + emit_waitcnt(ctx, LGKM_CNT & VM_CNT); return; } @@ -5481,6 +5537,23 @@ "tess_lds"); } +static unsigned si_get_max_workgroup_size(struct si_shader *shader) +{ + const unsigned *properties = shader->selector->info.properties; + unsigned max_work_group_size = + properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] * + properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] * + properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH]; + + if (!max_work_group_size) { + /* This is a variable group size compute shader, + * compile it for the maximum possible group size. + */ + max_work_group_size = SI_MAX_VARIABLE_THREADS_PER_BLOCK; + } + return max_work_group_size; +} + static void create_function(struct si_shader_context *ctx) { struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base; @@ -5706,22 +5779,9 @@ S_0286D0_FRONT_FACE_ENA(1) | S_0286D0_POS_FIXED_PT_ENA(1)); } else if (ctx->type == PIPE_SHADER_COMPUTE) { - const unsigned *properties = shader->selector->info.properties; - unsigned max_work_group_size = - properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] * - properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] * - properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH]; - - if (!max_work_group_size) { - /* This is a variable group size compute shader, - * compile it for the maximum possible group size. - */ - max_work_group_size = SI_MAX_VARIABLE_THREADS_PER_BLOCK; - } - si_llvm_add_attribute(ctx->main_fn, "amdgpu-max-work-group-size", - max_work_group_size); + si_get_max_workgroup_size(shader)); } shader->info.num_input_sgprs = 0; @@ -6643,20 +6703,11 @@ * LLVM 3.9svn has this bug. */ if (sel->type == PIPE_SHADER_COMPUTE) { - unsigned *props = sel->info.properties; unsigned wave_size = 64; unsigned max_vgprs = 256; unsigned max_sgprs = sscreen->b.chip_class >= VI ? 800 : 512; unsigned max_sgprs_per_wave = 128; - unsigned max_block_threads; - - if (props[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH]) - max_block_threads = props[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] * - props[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] * - props[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH]; - else - max_block_threads = SI_MAX_VARIABLE_THREADS_PER_BLOCK; - + unsigned max_block_threads = si_get_max_workgroup_size(shader); unsigned min_waves_per_cu = DIV_ROUND_UP(max_block_threads, wave_size); unsigned min_waves_per_simd = DIV_ROUND_UP(min_waves_per_cu, 4); @@ -7746,11 +7797,31 @@ return true; } -static void si_fix_num_sgprs(struct si_shader *shader) +void si_multiwave_lds_size_workaround(struct si_screen *sscreen, + unsigned *lds_size) +{ + /* SPI barrier management bug: + * Make sure we have at least 4k of LDS in use to avoid the bug. + * It applies to workgroup sizes of more than one wavefront. + */ + if (sscreen->b.family == CHIP_BONAIRE || + sscreen->b.family == CHIP_KABINI || + sscreen->b.family == CHIP_MULLINS) + *lds_size = MAX2(*lds_size, 8); +} + +static void si_fix_resource_usage(struct si_screen *sscreen, + struct si_shader *shader) { unsigned min_sgprs = shader->info.num_input_sgprs + 2; /* VCC */ shader->config.num_sgprs = MAX2(shader->config.num_sgprs, min_sgprs); + + if (shader->selector->type == PIPE_SHADER_COMPUTE && + si_get_max_workgroup_size(shader) > 64) { + si_multiwave_lds_size_workaround(sscreen, + &shader->config.lds_size); + } } int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, @@ -7846,7 +7917,7 @@ } } - si_fix_num_sgprs(shader); + si_fix_resource_usage(sscreen, shader); si_shader_dump(sscreen, shader, debug, sel->info.processor, stderr); diff -Nru mesa-13.0.2/src/gallium/drivers/radeonsi/si_shader.h mesa-13.0.3/src/gallium/drivers/radeonsi/si_shader.h --- mesa-13.0.2/src/gallium/drivers/radeonsi/si_shader.h 2016-11-10 22:06:40.000000000 +0000 +++ mesa-13.0.3/src/gallium/drivers/radeonsi/si_shader.h 2016-12-14 19:03:11.000000000 +0000 @@ -482,6 +482,8 @@ void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader, struct pipe_debug_callback *debug, unsigned processor, FILE *f); +void si_multiwave_lds_size_workaround(struct si_screen *sscreen, + unsigned *lds_size); void si_shader_apply_scratch_relocs(struct si_context *sctx, struct si_shader *shader, struct si_shader_config *config, diff -Nru mesa-13.0.2/src/gallium/drivers/radeonsi/si_state.c mesa-13.0.3/src/gallium/drivers/radeonsi/si_state.c --- mesa-13.0.2/src/gallium/drivers/radeonsi/si_state.c 2016-11-10 22:06:40.000000000 +0000 +++ mesa-13.0.3/src/gallium/drivers/radeonsi/si_state.c 2016-12-14 19:03:10.000000000 +0000 @@ -453,8 +453,14 @@ S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED); /* Only set dual source blending for MRT0 to avoid a hang. */ - if (i >= 1 && blend->dual_src_blend) + if (i >= 1 && blend->dual_src_blend) { + /* Vulkan does this for dual source blending. */ + if (i == 1) + blend_cntl |= S_028780_ENABLE(1); + + si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); continue; + } /* Only addition and subtraction equations are supported with * dual source blending. @@ -463,16 +469,14 @@ (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX || eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX)) { assert(!"Unsupported equation for dual source blending"); + si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); continue; } - if (!state->rt[j].colormask) - continue; - /* cb_render_state will disable unused ones */ blend->cb_target_mask |= (unsigned)state->rt[j].colormask << (4 * i); - if (!state->rt[j].blend_enable) { + if (!state->rt[j].colormask || !state->rt[j].blend_enable) { si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); continue; } @@ -553,6 +557,17 @@ } if (sctx->b.family == CHIP_STONEY) { + /* Disable RB+ blend optimizations for dual source blending. + * Vulkan does this. + */ + if (blend->dual_src_blend) { + for (int i = 0; i < 8; i++) { + sx_mrt_blend_opt[i] = + S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) | + S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE); + } + } + for (int i = 0; i < 8; i++) si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4, sx_mrt_blend_opt[i]); diff -Nru mesa-13.0.2/src/gallium/drivers/radeonsi/si_state_draw.c mesa-13.0.3/src/gallium/drivers/radeonsi/si_state_draw.c --- mesa-13.0.2/src/gallium/drivers/radeonsi/si_state_draw.c 2016-11-10 22:06:40.000000000 +0000 +++ mesa-13.0.3/src/gallium/drivers/radeonsi/si_state_draw.c 2016-12-14 19:03:11.000000000 +0000 @@ -154,6 +154,12 @@ */ *num_patches = MIN2(*num_patches, 40); + /* SI bug workaround - limit LS-HS threadgroups to only one wave. */ + if (sctx->b.chip_class == SI) { + unsigned one_wave = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp); + *num_patches = MIN2(*num_patches, one_wave); + } + output_patch0_offset = input_patch_size * *num_patches; perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size; @@ -162,11 +168,13 @@ if (sctx->b.chip_class >= CIK) { assert(lds_size <= 65536); - ls_rsrc2 |= S_00B52C_LDS_SIZE(align(lds_size, 512) / 512); + lds_size = align(lds_size, 512) / 512; } else { assert(lds_size <= 32768); - ls_rsrc2 |= S_00B52C_LDS_SIZE(align(lds_size, 256) / 256); + lds_size = align(lds_size, 256) / 256; } + si_multiwave_lds_size_workaround(sctx->screen, &lds_size); + ls_rsrc2 |= S_00B52C_LDS_SIZE(lds_size); if (sctx->last_ls == ls->current && sctx->last_tcs == tcs && @@ -284,10 +292,18 @@ /* Needed for 028B6C_DISTRIBUTION_MODE != 0 */ if (sctx->screen->has_distributed_tess) { - if (sctx->gs_shader.cso) + if (sctx->gs_shader.cso) { partial_es_wave = true; - else + + /* GPU hang workaround. */ + if (sctx->b.family == CHIP_TONGA || + sctx->b.family == CHIP_FIJI || + sctx->b.family == CHIP_POLARIS10 || + sctx->b.family == CHIP_POLARIS11) + partial_vs_wave = true; + } else { partial_vs_wave = true; + } } } diff -Nru mesa-13.0.2/src/gallium/drivers/radeonsi/si_state_shaders.c mesa-13.0.3/src/gallium/drivers/radeonsi/si_state_shaders.c --- mesa-13.0.2/src/gallium/drivers/radeonsi/si_state_shaders.c 2016-11-10 22:06:40.000000000 +0000 +++ mesa-13.0.3/src/gallium/drivers/radeonsi/si_state_shaders.c 2016-12-16 14:03:24.000000000 +0000 @@ -1777,10 +1777,15 @@ sctx->esgs_ring, 0, sctx->esgs_ring->width0, false, false, 0, 0, 0); } - if (sctx->gsvs_ring) + if (sctx->gsvs_ring) { si_set_ring_buffer(&sctx->b.b, SI_VS_RING_GSVS, sctx->gsvs_ring, 0, sctx->gsvs_ring->width0, false, false, 0, 0, 0); + + /* Also update SI_GS_RING_GSVSi descriptors. */ + sctx->last_gsvs_itemsize = 0; + } + return true; } diff -Nru mesa-13.0.2/src/gallium/drivers/vc4/vc4_program.c mesa-13.0.3/src/gallium/drivers/vc4/vc4_program.c --- mesa-13.0.2/src/gallium/drivers/vc4/vc4_program.c 2016-11-23 18:56:30.000000000 +0000 +++ mesa-13.0.3/src/gallium/drivers/vc4/vc4_program.c 2016-12-14 19:03:11.000000000 +0000 @@ -1865,22 +1865,29 @@ static void ntq_emit_jump(struct vc4_compile *c, nir_jump_instr *jump) { + struct qblock *jump_block; switch (jump->type) { case nir_jump_break: - qir_SF(c, c->execute); - qir_MOV_cond(c, QPU_COND_ZS, c->execute, - qir_uniform_ui(c, c->loop_break_block->index)); + jump_block = c->loop_break_block; break; - case nir_jump_continue: - qir_SF(c, c->execute); - qir_MOV_cond(c, QPU_COND_ZS, c->execute, - qir_uniform_ui(c, c->loop_cont_block->index)); + jump_block = c->loop_cont_block; break; - - case nir_jump_return: - unreachable("All returns shouold be lowered\n"); + default: + unreachable("Unsupported jump type\n"); } + + qir_SF(c, c->execute); + qir_MOV_cond(c, QPU_COND_ZS, c->execute, + qir_uniform_ui(c, jump_block->index)); + + /* Jump to the destination block if everyone has taken the jump. */ + qir_SF(c, qir_SUB(c, c->execute, qir_uniform_ui(c, jump_block->index))); + qir_BRANCH(c, QPU_COND_BRANCH_ALL_ZS); + struct qblock *new_block = qir_new_block(c); + qir_link_blocks(c->cur_block, jump_block); + qir_link_blocks(c->cur_block, new_block); + qir_set_emit_block(c, new_block); } static void diff -Nru mesa-13.0.2/src/intel/genxml/gen9_pack.h mesa-13.0.3/src/intel/genxml/gen9_pack.h --- mesa-13.0.2/src/intel/genxml/gen9_pack.h 2016-11-28 15:16:41.000000000 +0000 +++ mesa-13.0.3/src/intel/genxml/gen9_pack.h 2017-01-05 15:53:01.000000000 +0000 @@ -8220,7 +8220,6 @@ #define GEN9_MI_SEMAPHORE_WAIT_header \ .CommandType = 0, \ .MICommandOpcode = 28, \ - .RegisterPollMode = 1, \ .DWordLength = 2 struct GEN9_MI_SEMAPHORE_WAIT { @@ -8229,7 +8228,7 @@ uint32_t MemoryType; #define PerProcessGraphicsAddress 0 #define GlobalGraphicsAddress 1 - uint32_t RegisterPollMode; + bool RegisterPollMode; uint32_t WaitMode; #define PollingMode 1 #define SignalMode 0 diff -Nru mesa-13.0.2/src/intel/genxml/gen9.xml mesa-13.0.3/src/intel/genxml/gen9.xml --- mesa-13.0.2/src/intel/genxml/gen9.xml 2016-11-10 22:05:17.000000000 +0000 +++ mesa-13.0.3/src/intel/genxml/gen9.xml 2016-12-14 19:03:11.000000000 +0000 @@ -3194,7 +3194,7 @@ - + diff -Nru mesa-13.0.2/src/intel/genxml/gen9_xml.h mesa-13.0.3/src/intel/genxml/gen9_xml.h --- mesa-13.0.2/src/intel/genxml/gen9_xml.h 2016-11-28 15:16:41.000000000 +0000 +++ mesa-13.0.3/src/intel/genxml/gen9_xml.h 2017-01-05 15:53:01.000000000 +0000 @@ -15247,8 +15247,7 @@ 0x72, 0x20, 0x50, 0x6f, 0x6c, 0x6c, 0x20, 0x4d, 0x6f, 0x64, 0x65, 0x22, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x3d, 0x22, 0x31, 0x36, 0x22, 0x20, 0x65, 0x6e, 0x64, 0x3d, 0x22, 0x31, 0x36, 0x22, 0x20, 0x74, 0x79, 0x70, - 0x65, 0x3d, 0x22, 0x75, 0x69, 0x6e, 0x74, 0x22, 0x20, 0x64, 0x65, 0x66, - 0x61, 0x75, 0x6c, 0x74, 0x3d, 0x22, 0x31, 0x22, 0x2f, 0x3e, 0x0a, 0x20, + 0x65, 0x3d, 0x22, 0x62, 0x6f, 0x6f, 0x6c, 0x22, 0x2f, 0x3e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x3c, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x20, 0x6e, 0x61, 0x6d, 0x65, 0x3d, 0x22, 0x57, 0x61, 0x69, 0x74, 0x20, 0x4d, 0x6f, 0x64, 0x65, 0x22, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x3d, 0x22, 0x31, 0x35, diff -Nru mesa-13.0.2/src/intel/vulkan/anv_allocator.c mesa-13.0.3/src/intel/vulkan/anv_allocator.c --- mesa-13.0.2/src/intel/vulkan/anv_allocator.c 2016-11-10 22:06:40.000000000 +0000 +++ mesa-13.0.3/src/intel/vulkan/anv_allocator.c 2016-12-14 19:03:10.000000000 +0000 @@ -246,10 +246,12 @@ static uint32_t anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state); -void +VkResult anv_block_pool_init(struct anv_block_pool *pool, struct anv_device *device, uint32_t block_size) { + VkResult result; + assert(util_is_power_of_two(block_size)); pool->device = device; @@ -260,17 +262,23 @@ pool->fd = memfd_create("block pool", MFD_CLOEXEC); if (pool->fd == -1) - return; + return vk_error(VK_ERROR_INITIALIZATION_FAILED); /* Just make it 2GB up-front. The Linux kernel won't actually back it * with pages until we either map and fault on one of them or we use * userptr and send a chunk of it off to the GPU. */ - if (ftruncate(pool->fd, BLOCK_POOL_MEMFD_SIZE) == -1) - return; - - u_vector_init(&pool->mmap_cleanups, - round_to_power_of_two(sizeof(struct anv_mmap_cleanup)), 128); + if (ftruncate(pool->fd, BLOCK_POOL_MEMFD_SIZE) == -1) { + result = vk_error(VK_ERROR_INITIALIZATION_FAILED); + goto fail_fd; + } + + if (!u_vector_init(&pool->mmap_cleanups, + round_to_power_of_two(sizeof(struct anv_mmap_cleanup)), + 128)) { + result = vk_error(VK_ERROR_INITIALIZATION_FAILED); + goto fail_fd; + } pool->state.next = 0; pool->state.end = 0; @@ -279,6 +287,13 @@ /* Immediately grow the pool so we'll have a backing bo. */ pool->state.end = anv_block_pool_grow(pool, &pool->state); + + return VK_SUCCESS; + + fail_fd: + close(pool->fd); + + return result; } void diff -Nru mesa-13.0.2/src/intel/vulkan/anv_descriptor_set.c mesa-13.0.3/src/intel/vulkan/anv_descriptor_set.c --- mesa-13.0.2/src/intel/vulkan/anv_descriptor_set.c 2016-11-24 16:34:40.000000000 +0000 +++ mesa-13.0.3/src/intel/vulkan/anv_descriptor_set.c 2016-12-16 14:03:24.000000000 +0000 @@ -498,6 +498,7 @@ struct surface_state_free_list_entry *entry = set->buffer_views[b].surface_state.map; entry->next = pool->surface_state_free_list; + entry->offset = set->buffer_views[b].surface_state.offset; pool->surface_state_free_list = entry; } diff -Nru mesa-13.0.2/src/intel/vulkan/anv_device.c mesa-13.0.3/src/intel/vulkan/anv_device.c --- mesa-13.0.2/src/intel/vulkan/anv_device.c 2016-11-24 16:34:42.000000000 +0000 +++ mesa-13.0.3/src/intel/vulkan/anv_device.c 2016-12-16 14:03:24.000000000 +0000 @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -162,8 +163,6 @@ device->info.max_cs_threads = max_cs_threads; } - close(fd); - brw_process_intel_debug_variable(); device->compiler = brw_compiler_create(NULL, &device->info); @@ -175,12 +174,15 @@ device->compiler->shader_perf_log = compiler_perf_log; result = anv_init_wsi(device); - if (result != VK_SUCCESS) - goto fail; + if (result != VK_SUCCESS) { + ralloc_free(device->compiler); + goto fail; + } /* XXX: Actually detect bit6 swizzling */ isl_device_init(&device->isl_dev, &device->info, swizzled); + close(fd); return VK_SUCCESS; fail: @@ -527,7 +529,7 @@ .maxGeometryTotalOutputComponents = 1024, .maxFragmentInputComponents = 128, .maxFragmentOutputAttachments = 8, - .maxFragmentDualSrcAttachments = 2, + .maxFragmentDualSrcAttachments = 1, .maxFragmentCombinedOutputResources = 8, .maxComputeSharedMemorySize = 32768, .maxComputeWorkGroupCount = { 65535, 65535, 65535 }, @@ -967,10 +969,10 @@ { ANV_FROM_HANDLE(anv_device, device, _device); - anv_queue_finish(&device->queue); - anv_device_finish_blorp(device); + anv_queue_finish(&device->queue); + #ifdef HAVE_VALGRIND /* We only need to free these to prevent valgrind errors. The backing * BO will go away in a couple of lines so we don't actually leak. @@ -978,22 +980,27 @@ anv_state_pool_free(&device->dynamic_state_pool, device->border_colors); #endif + anv_scratch_pool_finish(device, &device->scratch_pool); + anv_gem_munmap(device->workaround_bo.map, device->workaround_bo.size); anv_gem_close(device, device->workaround_bo.gem_handle); - anv_bo_pool_finish(&device->batch_bo_pool); - anv_state_pool_finish(&device->dynamic_state_pool); - anv_block_pool_finish(&device->dynamic_state_block_pool); - anv_state_pool_finish(&device->instruction_state_pool); - anv_block_pool_finish(&device->instruction_block_pool); anv_state_pool_finish(&device->surface_state_pool); anv_block_pool_finish(&device->surface_state_block_pool); - anv_scratch_pool_finish(device, &device->scratch_pool); + anv_state_pool_finish(&device->instruction_state_pool); + anv_block_pool_finish(&device->instruction_block_pool); + anv_state_pool_finish(&device->dynamic_state_pool); + anv_block_pool_finish(&device->dynamic_state_block_pool); - close(device->fd); + anv_bo_pool_finish(&device->batch_bo_pool); + pthread_cond_destroy(&device->queue_submit); pthread_mutex_destroy(&device->mutex); + anv_gem_destroy_context(device, device->context_id); + + close(device->fd); + vk_free(&device->alloc, device); } @@ -1236,6 +1243,9 @@ mem->type_index = pAllocateInfo->memoryTypeIndex; + mem->map = NULL; + mem->map_size = 0; + *pMem = anv_device_memory_to_handle(mem); return VK_SUCCESS; @@ -1257,6 +1267,9 @@ if (mem == NULL) return; + if (mem->map) + anv_UnmapMemory(_device, _mem); + if (mem->bo.map) anv_gem_munmap(mem->bo.map, mem->bo.size); @@ -1303,8 +1316,12 @@ /* Let's map whole pages */ map_size = align_u64(map_size, 4096); - mem->map = anv_gem_mmap(device, mem->bo.gem_handle, - map_offset, map_size, gem_flags); + void *map = anv_gem_mmap(device, mem->bo.gem_handle, + map_offset, map_size, gem_flags); + if (map == MAP_FAILED) + return vk_error(VK_ERROR_MEMORY_MAP_FAILED); + + mem->map = map; mem->map_size = map_size; *ppData = mem->map + (offset - map_offset); @@ -1322,6 +1339,9 @@ return; anv_gem_munmap(mem->map, mem->map_size); + + mem->map = NULL; + mem->map_size = 0; } static void diff -Nru mesa-13.0.2/src/intel/vulkan/anv_gem.c mesa-13.0.3/src/intel/vulkan/anv_gem.c --- mesa-13.0.2/src/intel/vulkan/anv_gem.c 2016-11-10 22:06:40.000000000 +0000 +++ mesa-13.0.3/src/intel/vulkan/anv_gem.c 2016-12-16 14:03:24.000000000 +0000 @@ -88,10 +88,8 @@ }; int ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_MMAP, &gem_mmap); - if (ret != 0) { - /* FIXME: Is NULL the right error return? Cf MAP_INVALID */ - return NULL; - } + if (ret != 0) + return MAP_FAILED; VG(VALGRIND_MALLOCLIKE_BLOCK(gem_mmap.addr_ptr, gem_mmap.size, 0, 1)); return (void *)(uintptr_t) gem_mmap.addr_ptr; diff -Nru mesa-13.0.2/src/intel/vulkan/anv_image.c mesa-13.0.3/src/intel/vulkan/anv_image.c --- mesa-13.0.2/src/intel/vulkan/anv_image.c 2016-11-24 16:34:40.000000000 +0000 +++ mesa-13.0.3/src/intel/vulkan/anv_image.c 2016-12-14 19:03:10.000000000 +0000 @@ -194,8 +194,8 @@ anv_finishme("Test gen8 multisampled HiZ"); } else { isl_surf_get_hiz_surf(&dev->isl_dev, &image->depth_surface.isl, - &image->hiz_surface.isl); - add_surface(image, &image->hiz_surface); + &image->aux_surface.isl); + add_surface(image, &image->aux_surface); } } @@ -306,16 +306,16 @@ /* The offset and size must be a multiple of 4K or else the * anv_gem_mmap call below will return NULL. */ - assert((image->offset + image->hiz_surface.offset) % 4096 == 0); - assert(image->hiz_surface.isl.size % 4096 == 0); + assert((image->offset + image->aux_surface.offset) % 4096 == 0); + assert(image->aux_surface.isl.size % 4096 == 0); /* HiZ surfaces need to have their memory cleared to 0 before they * can be used. If we let it have garbage data, it can cause GPU * hangs on some hardware. */ void *map = anv_gem_mmap(device, image->bo->gem_handle, - image->offset + image->hiz_surface.offset, - image->hiz_surface.isl.size, + image->offset + image->aux_surface.offset, + image->aux_surface.isl.size, device->info.has_llc ? 0 : I915_MMAP_WC); /* If anv_gem_mmap returns NULL, it's likely that the kernel was @@ -324,9 +324,9 @@ if (map == NULL) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); - memset(map, 0, image->hiz_surface.isl.size); + memset(map, 0, image->aux_surface.isl.size); - anv_gem_munmap(map, image->hiz_surface.isl.size); + anv_gem_munmap(map, image->aux_surface.isl.size); } return VK_SUCCESS; diff -Nru mesa-13.0.2/src/intel/vulkan/anv_private.h mesa-13.0.3/src/intel/vulkan/anv_private.h --- mesa-13.0.2/src/intel/vulkan/anv_private.h 2016-11-23 18:56:30.000000000 +0000 +++ mesa-13.0.3/src/intel/vulkan/anv_private.h 2016-12-14 19:03:10.000000000 +0000 @@ -416,8 +416,8 @@ anv_clflush_range(state.map, state.alloc_size); } -void anv_block_pool_init(struct anv_block_pool *pool, - struct anv_device *device, uint32_t block_size); +VkResult anv_block_pool_init(struct anv_block_pool *pool, + struct anv_device *device, uint32_t block_size); void anv_block_pool_finish(struct anv_block_pool *pool); int32_t anv_block_pool_alloc(struct anv_block_pool *pool); int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool); @@ -1526,10 +1526,11 @@ struct { struct anv_surface depth_surface; - struct anv_surface hiz_surface; struct anv_surface stencil_surface; }; }; + + struct anv_surface aux_surface; }; static inline uint32_t @@ -1593,11 +1594,11 @@ static inline bool anv_image_has_hiz(const struct anv_image *image) { - /* We must check the aspect because anv_image::hiz_surface belongs to - * a union. + /* We must check the aspect because anv_image::aux_surface may be used for + * any type of auxiliary surface, not just HiZ. */ return (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && - image->hiz_surface.isl.size > 0; + image->aux_surface.isl.size > 0; } struct anv_buffer_view { diff -Nru mesa-13.0.2/src/intel/vulkan/anv_timestamp.h mesa-13.0.3/src/intel/vulkan/anv_timestamp.h --- mesa-13.0.2/src/intel/vulkan/anv_timestamp.h 2016-11-28 15:16:44.000000000 +0000 +++ mesa-13.0.3/src/intel/vulkan/anv_timestamp.h 2017-01-05 15:53:04.000000000 +0000 @@ -1 +1 @@ -#define ANV_TIMESTAMP "1480346204" +#define ANV_TIMESTAMP "1483631584" diff -Nru mesa-13.0.2/src/intel/vulkan/genX_cmd_buffer.c mesa-13.0.3/src/intel/vulkan/genX_cmd_buffer.c --- mesa-13.0.2/src/intel/vulkan/genX_cmd_buffer.c 2016-11-28 12:19:19.000000000 +0000 +++ mesa-13.0.3/src/intel/vulkan/genX_cmd_buffer.c 2016-12-16 14:03:24.000000000 +0000 @@ -1356,22 +1356,13 @@ result = emit_binding_table(cmd_buffer, MESA_SHADER_COMPUTE, &surfaces); assert(result == VK_SUCCESS); } + result = emit_samplers(cmd_buffer, MESA_SHADER_COMPUTE, &samplers); assert(result == VK_SUCCESS); - - struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer); - const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); const struct brw_stage_prog_data *prog_data = &cs_prog_data->base; - if (push_state.alloc_size) { - anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD), curbe) { - curbe.CURBETotalDataLength = push_state.alloc_size; - curbe.CURBEDataStartAddress = push_state.offset; - } - } - const uint32_t slm_size = encode_slm_size(GEN_GEN, prog_data->total_shared); struct anv_state state = @@ -1441,6 +1432,18 @@ cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT; } + if (cmd_buffer->state.push_constants_dirty & VK_SHADER_STAGE_COMPUTE_BIT) { + struct anv_state push_state = + anv_cmd_buffer_cs_push_constants(cmd_buffer); + + if (push_state.alloc_size) { + anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD), curbe) { + curbe.CURBETotalDataLength = push_state.alloc_size; + curbe.CURBEDataStartAddress = push_state.offset; + } + } + } + cmd_buffer->state.compute_dirty = 0; genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); @@ -1796,10 +1799,10 @@ if (has_hiz) { anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_HIER_DEPTH_BUFFER), hdb) { hdb.HierarchicalDepthBufferObjectControlState = GENX(MOCS); - hdb.SurfacePitch = image->hiz_surface.isl.row_pitch - 1; + hdb.SurfacePitch = image->aux_surface.isl.row_pitch - 1; hdb.SurfaceBaseAddress = (struct anv_address) { .bo = image->bo, - .offset = image->offset + image->hiz_surface.offset, + .offset = image->offset + image->aux_surface.offset, }; #if GEN_GEN >= 8 /* From the SKL PRM Vol2a: @@ -1809,11 +1812,14 @@ * - SURFTYPE_1D: distance in pixels between array slices * - SURFTYPE_2D/CUBE: distance in rows between array slices * - SURFTYPE_3D: distance in rows between R - slices + * + * Unfortunately, the docs aren't 100% accurate here. They fail to + * mention that the 1-D rule only applies to linear 1-D images. + * Since depth and HiZ buffers are always tiled, they are treated as + * 2-D images. Prior to Sky Lake, this field is always in rows. */ hdb.SurfaceQPitch = - image->hiz_surface.isl.dim == ISL_SURF_DIM_1D ? - isl_surf_get_array_pitch_el(&image->hiz_surface.isl) >> 2 : - isl_surf_get_array_pitch_el_rows(&image->hiz_surface.isl) >> 2; + isl_surf_get_array_pitch_el_rows(&image->aux_surface.isl) >> 2; #endif } } else { diff -Nru mesa-13.0.2/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp mesa-13.0.3/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp --- mesa-13.0.2/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp 2016-11-10 22:06:40.000000000 +0000 +++ mesa-13.0.3/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp 2016-12-24 13:03:53.000000000 +0000 @@ -129,7 +129,7 @@ foreach_in_list(acp_entry, entry, &out_acp[block->num][i]) { acp[next_acp] = entry; - /* opt_copy_propagate_local populates out_acp with copies created + /* opt_copy_propagation_local populates out_acp with copies created * in a block which are still live at the end of the block. This * is exactly what we want in the COPY set. */ @@ -431,7 +431,9 @@ if (entry->saturate) { switch(inst->opcode) { case BRW_OPCODE_SEL: - if (inst->src[1].file != IMM || + if ((inst->conditional_mod != BRW_CONDITIONAL_GE && + inst->conditional_mod != BRW_CONDITIONAL_L) || + inst->src[1].file != IMM || inst->src[1].f < 0.0 || inst->src[1].f > 1.0) { return false; @@ -735,8 +737,8 @@ * list. */ bool -fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block, - exec_list *acp) +fs_visitor::opt_copy_propagation_local(void *copy_prop_ctx, bblock_t *block, + exec_list *acp) { bool progress = false; @@ -819,7 +821,7 @@ } bool -fs_visitor::opt_copy_propagate() +fs_visitor::opt_copy_propagation() { bool progress = false; void *copy_prop_ctx = ralloc_context(NULL); @@ -832,8 +834,8 @@ * the set of copies available at the end of the block. */ foreach_block (block, cfg) { - progress = opt_copy_propagate_local(copy_prop_ctx, block, - out_acp[block->num]) || progress; + progress = opt_copy_propagation_local(copy_prop_ctx, block, + out_acp[block->num]) || progress; } /* Do dataflow analysis for those available copies. */ @@ -852,7 +854,8 @@ } } - progress = opt_copy_propagate_local(copy_prop_ctx, block, in_acp) || progress; + progress = opt_copy_propagation_local(copy_prop_ctx, block, in_acp) || + progress; } for (int i = 0; i < cfg->num_blocks; i++) diff -Nru mesa-13.0.2/src/mesa/drivers/dri/i965/brw_fs.cpp mesa-13.0.3/src/mesa/drivers/dri/i965/brw_fs.cpp --- mesa-13.0.2/src/mesa/drivers/dri/i965/brw_fs.cpp 2016-11-10 22:06:40.000000000 +0000 +++ mesa-13.0.3/src/mesa/drivers/dri/i965/brw_fs.cpp 2016-12-16 14:03:24.000000000 +0000 @@ -5692,7 +5692,7 @@ OPT(opt_algebraic); OPT(opt_cse); - OPT(opt_copy_propagate); + OPT(opt_copy_propagation); OPT(opt_predicated_break, this); OPT(opt_cmod_propagation); OPT(dead_code_eliminate); @@ -5716,7 +5716,7 @@ } if (OPT(lower_d2x)) { - OPT(opt_copy_propagate); + OPT(opt_copy_propagation); OPT(dead_code_eliminate); } @@ -5728,12 +5728,12 @@ OPT(lower_logical_sends); if (progress) { - OPT(opt_copy_propagate); + OPT(opt_copy_propagation); /* Only run after logical send lowering because it's easier to implement * in terms of physical sends. */ if (OPT(opt_zero_samples)) - OPT(opt_copy_propagate); + OPT(opt_copy_propagation); /* Run after logical send lowering to give it a chance to CSE the * LOAD_PAYLOAD instructions created to construct the payloads of * e.g. texturing messages in cases where it wasn't possible to CSE the @@ -5762,7 +5762,7 @@ if (devinfo->gen <= 5 && OPT(lower_minmax)) { OPT(opt_cmod_propagation); OPT(opt_cse); - OPT(opt_copy_propagate); + OPT(opt_copy_propagation); OPT(dead_code_eliminate); } diff -Nru mesa-13.0.2/src/mesa/drivers/dri/i965/brw_fs.h mesa-13.0.3/src/mesa/drivers/dri/i965/brw_fs.h --- mesa-13.0.2/src/mesa/drivers/dri/i965/brw_fs.h 2016-11-10 22:05:17.000000000 +0000 +++ mesa-13.0.3/src/mesa/drivers/dri/i965/brw_fs.h 2016-12-16 14:03:24.000000000 +0000 @@ -133,11 +133,11 @@ bool opt_redundant_discard_jumps(); bool opt_cse(); bool opt_cse_local(bblock_t *block); - bool opt_copy_propagate(); + bool opt_copy_propagation(); bool try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry); bool try_constant_propagate(fs_inst *inst, acp_entry *entry); - bool opt_copy_propagate_local(void *mem_ctx, bblock_t *block, - exec_list *acp); + bool opt_copy_propagation_local(void *mem_ctx, bblock_t *block, + exec_list *acp); bool opt_drop_redundant_mov_to_flags(); bool opt_register_renaming(); bool register_coalesce(); diff -Nru mesa-13.0.2/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp mesa-13.0.3/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp --- mesa-13.0.2/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp 2016-11-24 16:34:39.000000000 +0000 +++ mesa-13.0.3/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp 2016-12-14 19:03:11.000000000 +0000 @@ -780,7 +780,13 @@ if (compiler->devinfo->gen >= 8) output_size_bytes += 32; - assert(output_size_bytes >= 1); + /* Shaders can technically set max_vertices = 0, at which point we + * may have a URB size of 0 bytes. Nothing good can come from that, + * so enforce a minimum size. + */ + if (output_size_bytes == 0) + output_size_bytes = 1; + unsigned max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES; if (compiler->devinfo->gen == 6) max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES; diff -Nru mesa-13.0.2/src/mesa/drivers/dri/i965/intel_mipmap_tree.c mesa-13.0.3/src/mesa/drivers/dri/i965/intel_mipmap_tree.c --- mesa-13.0.2/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 2016-11-10 22:06:40.000000000 +0000 +++ mesa-13.0.3/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 2016-12-16 14:03:24.000000000 +0000 @@ -2159,6 +2159,8 @@ intel_miptree_release(&mt->mcs_mt); mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS; } + + mt->disable_aux_buffers = true; } diff -Nru mesa-13.0.2/src/mesa/drivers/dri/i965/Makefile.am mesa-13.0.3/src/mesa/drivers/dri/i965/Makefile.am --- mesa-13.0.2/src/mesa/drivers/dri/i965/Makefile.am 2016-11-10 22:05:17.000000000 +0000 +++ mesa-13.0.3/src/mesa/drivers/dri/i965/Makefile.am 2016-12-16 14:03:24.000000000 +0000 @@ -106,6 +106,7 @@ TESTS = \ test_fs_cmod_propagation \ + test_fs_copy_propagation \ test_fs_saturate_propagation \ test_eu_compact \ test_vf_float_conversions \ @@ -121,6 +122,12 @@ $(top_builddir)/src/gtest/libgtest.la \ $(TEST_LIBS) +test_fs_copy_propagation_SOURCES = \ + test_fs_copy_propagation.cpp +test_fs_copy_propagation_LDADD = \ + $(top_builddir)/src/gtest/libgtest.la \ + $(TEST_LIBS) + test_fs_saturate_propagation_SOURCES = \ test_fs_saturate_propagation.cpp test_fs_saturate_propagation_LDADD = \ diff -Nru mesa-13.0.2/src/mesa/drivers/dri/i965/Makefile.in mesa-13.0.3/src/mesa/drivers/dri/i965/Makefile.in --- mesa-13.0.2/src/mesa/drivers/dri/i965/Makefile.in 2016-11-28 15:16:07.000000000 +0000 +++ mesa-13.0.3/src/mesa/drivers/dri/i965/Makefile.in 2017-01-05 15:52:27.000000000 +0000 @@ -111,6 +111,7 @@ host_triplet = @host@ target_triplet = @target@ TESTS = test_fs_cmod_propagation$(EXEEXT) \ + test_fs_copy_propagation$(EXEEXT) \ test_fs_saturate_propagation$(EXEEXT) test_eu_compact$(EXEEXT) \ test_vf_float_conversions$(EXEEXT) \ test_vec4_cmod_propagation$(EXEEXT) \ @@ -262,6 +263,7 @@ $(libi965_gen9_la_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \ -o $@ am__EXEEXT_1 = test_fs_cmod_propagation$(EXEEXT) \ + test_fs_copy_propagation$(EXEEXT) \ test_fs_saturate_propagation$(EXEEXT) test_eu_compact$(EXEEXT) \ test_vf_float_conversions$(EXEEXT) \ test_vec4_cmod_propagation$(EXEEXT) \ @@ -281,6 +283,12 @@ $(am_test_fs_cmod_propagation_OBJECTS) test_fs_cmod_propagation_DEPENDENCIES = \ $(top_builddir)/src/gtest/libgtest.la $(am__DEPENDENCIES_2) +am_test_fs_copy_propagation_OBJECTS = \ + test_fs_copy_propagation.$(OBJEXT) +test_fs_copy_propagation_OBJECTS = \ + $(am_test_fs_copy_propagation_OBJECTS) +test_fs_copy_propagation_DEPENDENCIES = \ + $(top_builddir)/src/gtest/libgtest.la $(am__DEPENDENCIES_2) am_test_fs_saturate_propagation_OBJECTS = \ test_fs_saturate_propagation.$(OBJEXT) test_fs_saturate_propagation_OBJECTS = \ @@ -369,6 +377,7 @@ $(libi965_gen9_la_SOURCES) $(test_eu_compact_SOURCES) \ $(nodist_EXTRA_test_eu_compact_SOURCES) \ $(test_fs_cmod_propagation_SOURCES) \ + $(test_fs_copy_propagation_SOURCES) \ $(test_fs_saturate_propagation_SOURCES) \ $(test_vec4_cmod_propagation_SOURCES) \ $(test_vec4_copy_propagation_SOURCES) \ @@ -379,6 +388,7 @@ $(libi965_gen7_la_SOURCES) $(libi965_gen75_la_SOURCES) \ $(libi965_gen8_la_SOURCES) $(libi965_gen9_la_SOURCES) \ $(test_eu_compact_SOURCES) $(test_fs_cmod_propagation_SOURCES) \ + $(test_fs_copy_propagation_SOURCES) \ $(test_fs_saturate_propagation_SOURCES) \ $(test_vec4_cmod_propagation_SOURCES) \ $(test_vec4_copy_propagation_SOURCES) \ @@ -1260,6 +1270,13 @@ $(top_builddir)/src/gtest/libgtest.la \ $(TEST_LIBS) +test_fs_copy_propagation_SOURCES = \ + test_fs_copy_propagation.cpp + +test_fs_copy_propagation_LDADD = \ + $(top_builddir)/src/gtest/libgtest.la \ + $(TEST_LIBS) + test_fs_saturate_propagation_SOURCES = \ test_fs_saturate_propagation.cpp @@ -1385,6 +1402,10 @@ @rm -f test_fs_cmod_propagation$(EXEEXT) $(AM_V_CXXLD)$(CXXLINK) $(test_fs_cmod_propagation_OBJECTS) $(test_fs_cmod_propagation_LDADD) $(LIBS) +test_fs_copy_propagation$(EXEEXT): $(test_fs_copy_propagation_OBJECTS) $(test_fs_copy_propagation_DEPENDENCIES) $(EXTRA_test_fs_copy_propagation_DEPENDENCIES) + @rm -f test_fs_copy_propagation$(EXEEXT) + $(AM_V_CXXLD)$(CXXLINK) $(test_fs_copy_propagation_OBJECTS) $(test_fs_copy_propagation_LDADD) $(LIBS) + test_fs_saturate_propagation$(EXEEXT): $(test_fs_saturate_propagation_OBJECTS) $(test_fs_saturate_propagation_DEPENDENCIES) $(EXTRA_test_fs_saturate_propagation_DEPENDENCIES) @rm -f test_fs_saturate_propagation$(EXEEXT) $(AM_V_CXXLD)$(CXXLINK) $(test_fs_saturate_propagation_OBJECTS) $(test_fs_saturate_propagation_LDADD) $(LIBS) @@ -1611,6 +1632,7 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libi965_gen9_la-genX_blorp_exec.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_eu_compact.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_fs_cmod_propagation.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_fs_copy_propagation.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_fs_saturate_propagation.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_vec4_cmod_propagation.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_vec4_copy_propagation.Po@am__quote@ @@ -1905,6 +1927,13 @@ $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ --log-file $$b.log --trs-file $$b.trs \ $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +test_fs_copy_propagation.log: test_fs_copy_propagation$(EXEEXT) + @p='test_fs_copy_propagation$(EXEEXT)'; \ + b='test_fs_copy_propagation'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ "$$tst" $(AM_TESTS_FD_REDIRECT) test_fs_saturate_propagation.log: test_fs_saturate_propagation$(EXEEXT) @p='test_fs_saturate_propagation$(EXEEXT)'; \ diff -Nru mesa-13.0.2/src/mesa/drivers/dri/i965/test_fs_copy_propagation.cpp mesa-13.0.3/src/mesa/drivers/dri/i965/test_fs_copy_propagation.cpp --- mesa-13.0.2/src/mesa/drivers/dri/i965/test_fs_copy_propagation.cpp 1970-01-01 00:00:00.000000000 +0000 +++ mesa-13.0.3/src/mesa/drivers/dri/i965/test_fs_copy_propagation.cpp 2016-12-24 13:03:53.000000000 +0000 @@ -0,0 +1,213 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include "brw_fs.h" +#include "brw_cfg.h" +#include "program/program.h" + +using namespace brw; + +class copy_propagation_test : public ::testing::Test { + virtual void SetUp(); + +public: + struct brw_compiler *compiler; + struct gen_device_info *devinfo; + struct gl_context *ctx; + struct brw_wm_prog_data *prog_data; + struct gl_shader_program *shader_prog; + fs_visitor *v; +}; + +class copy_propagation_fs_visitor : public fs_visitor +{ +public: + copy_propagation_fs_visitor(struct brw_compiler *compiler, + struct brw_wm_prog_data *prog_data, + nir_shader *shader) + : fs_visitor(compiler, NULL, NULL, NULL, + &prog_data->base, (struct gl_program *) NULL, + shader, 8, -1) {} +}; + + +void copy_propagation_test::SetUp() +{ + ctx = (struct gl_context *)calloc(1, sizeof(*ctx)); + compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler)); + devinfo = (struct gen_device_info *)calloc(1, sizeof(*devinfo)); + compiler->devinfo = devinfo; + + prog_data = ralloc(NULL, struct brw_wm_prog_data); + nir_shader *shader = + nir_shader_create(NULL, MESA_SHADER_FRAGMENT, NULL); + + v = new copy_propagation_fs_visitor(compiler, prog_data, shader); + + devinfo->gen = 4; +} + +static fs_inst * +instruction(bblock_t *block, int num) +{ + fs_inst *inst = (fs_inst *)block->start(); + for (int i = 0; i < num; i++) { + inst = (fs_inst *)inst->next; + } + return inst; +} + +static bool +copy_propagation(fs_visitor *v) +{ + const bool print = getenv("TEST_DEBUG"); + + if (print) { + fprintf(stderr, "= Before =\n"); + v->cfg->dump(v); + } + + bool ret = v->opt_copy_propagation(); + + if (print) { + fprintf(stderr, "\n= After =\n"); + v->cfg->dump(v); + } + + return ret; +} + +TEST_F(copy_propagation_test, basic) +{ + const fs_builder &bld = v->bld; + fs_reg vgrf0 = v->vgrf(glsl_type::float_type); + fs_reg vgrf1 = v->vgrf(glsl_type::float_type); + fs_reg vgrf2 = v->vgrf(glsl_type::float_type); + fs_reg vgrf3 = v->vgrf(glsl_type::float_type); + bld.MOV(vgrf0, vgrf2); + bld.ADD(vgrf1, vgrf0, vgrf3); + + /* = Before = + * + * 0: mov(8) vgrf0 vgrf2 + * 1: add(8) vgrf1 vgrf0 vgrf3 + * + * = After = + * 0: mov(8) vgrf0 vgrf2 + * 1: add(8) vgrf1 vgrf2 vgrf3 + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_TRUE(copy_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + fs_inst *mov = instruction(block0, 0); + EXPECT_EQ(BRW_OPCODE_MOV, mov->opcode); + EXPECT_TRUE(mov->dst.equals(vgrf0)); + EXPECT_TRUE(mov->src[0].equals(vgrf2)); + + fs_inst *add = instruction(block0, 1); + EXPECT_EQ(BRW_OPCODE_ADD, add->opcode); + EXPECT_TRUE(add->dst.equals(vgrf1)); + EXPECT_TRUE(add->src[0].equals(vgrf2)); + EXPECT_TRUE(add->src[1].equals(vgrf3)); +} + +TEST_F(copy_propagation_test, maxmax_sat_imm) +{ + const fs_builder &bld = v->bld; + fs_reg vgrf0 = v->vgrf(glsl_type::float_type); + fs_reg vgrf1 = v->vgrf(glsl_type::float_type); + fs_reg vgrf2 = v->vgrf(glsl_type::float_type); + + static const struct { + enum brw_conditional_mod conditional_mod; + float immediate; + bool expected_result; + } test[] = { + /* conditional mod, imm, expected_result */ + { BRW_CONDITIONAL_GE , 0.1f, true }, + { BRW_CONDITIONAL_L , 0.1f, true }, + { BRW_CONDITIONAL_GE , 0.5f, true }, + { BRW_CONDITIONAL_L , 0.5f, true }, + { BRW_CONDITIONAL_GE , 0.9f, true }, + { BRW_CONDITIONAL_L , 0.9f, true }, + { BRW_CONDITIONAL_GE , -1.5f, false }, + { BRW_CONDITIONAL_L , -1.5f, false }, + { BRW_CONDITIONAL_GE , 1.5f, false }, + { BRW_CONDITIONAL_L , 1.5f, false }, + + { BRW_CONDITIONAL_NONE, 0.5f, false }, + { BRW_CONDITIONAL_Z , 0.5f, false }, + { BRW_CONDITIONAL_NZ , 0.5f, false }, + { BRW_CONDITIONAL_G , 0.5f, false }, + { BRW_CONDITIONAL_LE , 0.5f, false }, + { BRW_CONDITIONAL_R , 0.5f, false }, + { BRW_CONDITIONAL_O , 0.5f, false }, + { BRW_CONDITIONAL_U , 0.5f, false }, + }; + + for (unsigned i = 0; i < sizeof(test) / sizeof(test[0]); i++) { + fs_inst *mov = set_saturate(true, bld.MOV(vgrf0, vgrf1)); + fs_inst *sel = set_condmod(test[i].conditional_mod, + bld.SEL(vgrf2, vgrf0, + brw_imm_f(test[i].immediate))); + + v->calculate_cfg(); + + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_EQ(test[i].expected_result, copy_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_EQ(BRW_OPCODE_MOV, mov->opcode); + EXPECT_TRUE(mov->saturate); + EXPECT_TRUE(mov->dst.equals(vgrf0)); + EXPECT_TRUE(mov->src[0].equals(vgrf1)); + + EXPECT_EQ(BRW_OPCODE_SEL, sel->opcode); + EXPECT_EQ(test[i].conditional_mod, sel->conditional_mod); + EXPECT_EQ(test[i].expected_result, sel->saturate); + EXPECT_TRUE(sel->dst.equals(vgrf2)); + if (test[i].expected_result) { + EXPECT_TRUE(sel->src[0].equals(vgrf1)); + } else { + EXPECT_TRUE(sel->src[0].equals(vgrf0)); + } + EXPECT_TRUE(sel->src[1].equals(brw_imm_f(test[i].immediate))); + + delete v->cfg; + v->cfg = NULL; + } +} diff -Nru mesa-13.0.2/src/mesa/main/api_validate.c mesa-13.0.3/src/mesa/main/api_validate.c --- mesa-13.0.2/src/mesa/main/api_validate.c 2016-11-10 22:05:17.000000000 +0000 +++ mesa-13.0.3/src/mesa/main/api_validate.c 2016-12-14 19:03:11.000000000 +0000 @@ -925,7 +925,7 @@ * buffer bound. */ if (_mesa_is_gles31(ctx) && - ctx->Array.VAO->_Enabled != ctx->Array.VAO->VertexAttribBufferMask) { + ctx->Array.VAO->_Enabled & ~ctx->Array.VAO->VertexAttribBufferMask) { _mesa_error(ctx, GL_INVALID_OPERATION, "%s(No VBO bound)", name); return GL_FALSE; } diff -Nru mesa-13.0.2/src/mesa/main/fbobject.c mesa-13.0.3/src/mesa/main/fbobject.c --- mesa-13.0.2/src/mesa/main/fbobject.c 2016-11-10 22:05:17.000000000 +0000 +++ mesa-13.0.3/src/mesa/main/fbobject.c 2016-12-16 14:03:24.000000000 +0000 @@ -2850,6 +2850,7 @@ dst_att->Type = src_att->Type; dst_att->Complete = src_att->Complete; dst_att->TextureLevel = src_att->TextureLevel; + dst_att->CubeMapFace = src_att->CubeMapFace; dst_att->Zoffset = src_att->Zoffset; dst_att->Layered = src_att->Layered; } diff -Nru mesa-13.0.2/src/mesa/main/program_resource.c mesa-13.0.3/src/mesa/main/program_resource.c --- mesa-13.0.2/src/mesa/main/program_resource.c 2016-11-10 22:05:17.000000000 +0000 +++ mesa-13.0.3/src/mesa/main/program_resource.c 2016-12-14 19:03:10.000000000 +0000 @@ -67,9 +67,7 @@ } static struct gl_shader_program * -lookup_linked_program(GLuint program, - const char *caller, - bool raise_link_error) +lookup_linked_program(GLuint program, const char *caller) { GET_CURRENT_CONTEXT(ctx); struct gl_shader_program *prog = @@ -79,66 +77,13 @@ return NULL; if (prog->LinkStatus == GL_FALSE) { - if (raise_link_error) - _mesa_error(ctx, GL_INVALID_OPERATION, "%s(program not linked)", - caller); + _mesa_error(ctx, GL_INVALID_OPERATION, "%s(program not linked)", + caller); return NULL; } return prog; } -static GLenum -stage_from_program_interface(GLenum programInterface) -{ - switch(programInterface) { - case GL_VERTEX_SUBROUTINE_UNIFORM: - return MESA_SHADER_VERTEX; - case GL_TESS_CONTROL_SUBROUTINE_UNIFORM: - return MESA_SHADER_TESS_CTRL; - case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM: - return MESA_SHADER_TESS_EVAL; - case GL_GEOMETRY_SUBROUTINE_UNIFORM: - return MESA_SHADER_GEOMETRY; - case GL_FRAGMENT_SUBROUTINE_UNIFORM: - return MESA_SHADER_FRAGMENT; - case GL_COMPUTE_SUBROUTINE_UNIFORM: - return MESA_SHADER_COMPUTE; - default: - unreachable("unexpected programInterface value"); - } -} - -static struct gl_linked_shader * -lookup_linked_shader(GLuint program, - GLenum programInterface, - const char *caller) -{ - struct gl_shader_program *shLinkedProg = - lookup_linked_program(program, caller, false); - gl_shader_stage stage = stage_from_program_interface(programInterface); - - if (!shLinkedProg) - return NULL; - - return shLinkedProg->_LinkedShaders[stage]; -} - -static bool -is_subroutine_uniform_program_interface(GLenum programInterface) -{ - switch(programInterface) { - case GL_VERTEX_SUBROUTINE_UNIFORM: - case GL_TESS_CONTROL_SUBROUTINE_UNIFORM: - case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM: - case GL_GEOMETRY_SUBROUTINE_UNIFORM: - case GL_FRAGMENT_SUBROUTINE_UNIFORM: - case GL_COMPUTE_SUBROUTINE_UNIFORM: - return true; - default: - return false; - } -} - void GLAPIENTRY _mesa_GetProgramInterfaceiv(GLuint program, GLenum programInterface, GLenum pname, GLint *params) @@ -174,49 +119,9 @@ /* Validate pname against interface. */ switch(pname) { case GL_ACTIVE_RESOURCES: - if (is_subroutine_uniform_program_interface(programInterface)) { - /* ARB_program_interface_query doesn't explicitly says that those - * uniforms would need a linked shader, or that should fail if it is - * not the case, but Section 7.6 (Uniform Variables) of the OpenGL - * 4.4 Core Profile says: - * - * "A uniform is considered an active uniform if the compiler and - * linker determine that the uniform will actually be accessed - * when the executable code is executed. In cases where the - * compiler and linker cannot make a conclusive determination, - * the uniform will be considered active." - * - * So in order to know the real number of active subroutine uniforms - * we would need a linked shader . - * - * At the same time, Section 7.3 (Program Objects) of the OpenGL 4.4 - * Core Profile says: - * - * "The GL provides various commands allowing applications to - * enumerate and query properties of active variables and in- - * terface blocks for a specified program. If one of these - * commands is called with a program for which LinkProgram - * succeeded, the information recorded when the program was - * linked is returned. If one of these commands is called with a - * program for which LinkProgram failed, no error is generated - * unless otherwise noted." - * - * "If one of these commands is called with a program for which - * LinkProgram had never been called, no error is generated - * unless otherwise noted, and the program object is considered - * to have no active variables or interface blocks." - * - * So if the program is not linked we will return 0. - */ - struct gl_linked_shader *sh = - lookup_linked_shader(program, programInterface, "glGetProgramInterfaceiv"); - - *params = sh ? sh->NumSubroutineUniforms : 0; - } else { - for (i = 0, *params = 0; i < shProg->NumProgramResourceList; i++) - if (shProg->ProgramResourceList[i].Type == programInterface) - (*params)++; - } + for (i = 0, *params = 0; i < shProg->NumProgramResourceList; i++) + if (shProg->ProgramResourceList[i].Type == programInterface) + (*params)++; break; case GL_MAX_NAME_LENGTH: if (programInterface == GL_ATOMIC_COUNTER_BUFFER || @@ -500,7 +405,7 @@ } struct gl_shader_program *shProg = - lookup_linked_program(program, "glGetProgramResourceLocation", true); + lookup_linked_program(program, "glGetProgramResourceLocation"); if (!shProg || !name) return -1; @@ -556,7 +461,7 @@ } struct gl_shader_program *shProg = - lookup_linked_program(program, "glGetProgramResourceLocationIndex", true); + lookup_linked_program(program, "glGetProgramResourceLocationIndex"); if (!shProg || !name) return -1; diff -Nru mesa-13.0.2/src/vulkan/wsi/wsi_common_queue.h mesa-13.0.3/src/vulkan/wsi/wsi_common_queue.h --- mesa-13.0.2/src/vulkan/wsi/wsi_common_queue.h 2016-11-28 12:18:12.000000000 +0000 +++ mesa-13.0.3/src/vulkan/wsi/wsi_common_queue.h 2016-12-14 19:03:10.000000000 +0000 @@ -65,6 +65,7 @@ if (ret) goto fail_cond; + pthread_condattr_destroy(&condattr); return 0; fail_cond: diff -Nru mesa-13.0.2/VERSION mesa-13.0.3/VERSION --- mesa-13.0.2/VERSION 2016-11-28 15:02:37.000000000 +0000 +++ mesa-13.0.3/VERSION 2017-01-05 15:49:50.000000000 +0000 @@ -1 +1 @@ -13.0.2 +13.0.3