diff -Nru mesa-13.0.2/configure mesa-13.0.3/configure
--- mesa-13.0.2/configure	2016-11-28 15:16:01.000000000 +0000
+++ mesa-13.0.3/configure	2017-01-05 15:52:21.000000000 +0000
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for Mesa 13.0.2.
+# Generated by GNU Autoconf 2.69 for Mesa 13.0.3.
 #
 # Report bugs to <https://bugs.freedesktop.org/enter_bug.cgi?product=Mesa>.
 #
@@ -591,8 +591,8 @@
 # Identity of this package.
 PACKAGE_NAME='Mesa'
 PACKAGE_TARNAME='mesa'
-PACKAGE_VERSION='13.0.2'
-PACKAGE_STRING='Mesa 13.0.2'
+PACKAGE_VERSION='13.0.3'
+PACKAGE_STRING='Mesa 13.0.3'
 PACKAGE_BUGREPORT='https://bugs.freedesktop.org/enter_bug.cgi?product=Mesa'
 PACKAGE_URL=''
 
@@ -1819,7 +1819,7 @@
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures Mesa 13.0.2 to adapt to many kinds of systems.
+\`configure' configures Mesa 13.0.3 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1890,7 +1890,7 @@
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of Mesa 13.0.2:";;
+     short | recursive ) echo "Configuration of Mesa 13.0.3:";;
    esac
   cat <<\_ACEOF
 
@@ -2226,7 +2226,7 @@
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-Mesa configure 13.0.2
+Mesa configure 13.0.3
 generated by GNU Autoconf 2.69
 
 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2945,7 +2945,7 @@
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by Mesa $as_me 13.0.2, which was
+It was created by Mesa $as_me 13.0.3, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   $ $0 $@
@@ -3920,7 +3920,7 @@
 
 # Define the identity of the package.
  PACKAGE='mesa'
- VERSION='13.0.2'
+ VERSION='13.0.3'
 
 
 cat >>confdefs.h <<_ACEOF
@@ -29350,7 +29350,7 @@
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by Mesa $as_me 13.0.2, which was
+This file was extended by Mesa $as_me 13.0.3, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -29407,7 +29407,7 @@
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-Mesa config.status 13.0.2
+Mesa config.status 13.0.3
 configured by $0, generated by GNU Autoconf 2.69,
   with options \\"\$ac_cs_config\\"
 
diff -Nru mesa-13.0.2/debian/changelog mesa-13.0.3/debian/changelog
--- mesa-13.0.2/debian/changelog	2017-01-14 01:32:35.000000000 +0000
+++ mesa-13.0.3/debian/changelog	2017-01-14 01:32:36.000000000 +0000
@@ -1,3 +1,60 @@
+mesa (13.0.3-1ubuntu2) zesty; urgency=medium
+
+  * rules: Drop forcing -O2 to fix ppc64el ftbfs, gcc got fixed already.
+    (LP: #1605796)
+  * control: Fix dependencies for ubuntuBSD. (LP: #1565080)
+  * compat,rules: Don't bump compat to 10, breaks applying patches
+    before autoreconf which we need for the Mir EGL platform.
+
+ -- Timo Aaltonen <tjaalton@debian.org>  Thu, 12 Jan 2017 09:49:20 +0200
+
+mesa (13.0.3-1ubuntu1) zesty; urgency=medium
+
+  * Merge from Debian.
+  * intel: Add patches to support 16k textures. (LP: #1655556)
+
+ -- Timo Aaltonen <tjaalton@debian.org>  Wed, 11 Jan 2017 09:45:19 +0200
+
+mesa (13.0.3-1) unstable; urgency=medium
+
+  [ Timo Aaltonen ]
+  * New upstream release.
+
+  [ Andreas Boll ]
+  * Add vl-zscan-fix-Fix-trivial-sign-compare-warnings.diff: Fixes mpeg2
+    decoding on radeon GPUs without native mpeg2 support (Closes:
+    #846297).  Thanks to Jörg-Volker Peetz for reporting and tracking
+    down this regression!
+  * Add r600-sb-Fix-loop-optimization-related-hangs-on-eg.diff: Fixes
+    GPU hangs on r600g (Evergreen and Northern Islands GPUs) in many
+    games.
+
+ -- Timo Aaltonen <tjaalton@debian.org>  Tue, 10 Jan 2017 17:38:23 +0200
+
+mesa (13.0.2-3) unstable; urgency=medium
+
+  * Fix typo to correctly disable asm on x32. Should fix FTBFS on x32.
+  * Add debian/source/format file.
+
+ -- Andreas Boll <andreas.boll.dev@gmail.com>  Thu, 15 Dec 2016 16:42:25 +0100
+
+mesa (13.0.2-2) unstable; urgency=medium
+
+  * Bump llvm/libclang build-deps to >= 1:3.9.1.
+    - Enables support for OpenCL on AMD Polaris GPUs (Closes: #848173).
+  * Cherry-pick 6dc96de (cso: don't release sampler states that are
+    bound) from upstream master branch. Fixes random GPU hangs on
+    radeonsi (AMD Southern Islands and newer) in many games.
+  * Bump debhelper compat to 10.
+  * Stop passing --disable-silent-rules to configure, debhelper does
+    that for a while.
+  * Drop no longer needed dpkg-dev versioned build-dependency.
+  * Drop redundant cross-build detection, debhelper does this for us
+    already.
+  * Disable assembly usage on x32. Related to Bug #758094.
+
+ -- Andreas Boll <andreas.boll.dev@gmail.com>  Thu, 15 Dec 2016 15:16:56 +0100
+
 mesa (13.0.2-1ubuntu1) zesty; urgency=medium
 
   * Merge from Debian.
diff -Nru mesa-13.0.2/debian/control mesa-13.0.3/debian/control
--- mesa-13.0.2/debian/control	2017-01-14 01:32:35.000000000 +0000
+++ mesa-13.0.3/debian/control	2017-01-14 01:32:36.000000000 +0000
@@ -6,8 +6,7 @@
 Uploaders: Andreas Boll <andreas.boll.dev@gmail.com>
 Standards-Version: 3.9.8
 Build-Depends:
- debhelper (>= 9),
- dh-autoreconf,
+ debhelper (>= 10),
  quilt (>= 0.40),
  pkg-config,
  libdrm-dev (>= 2.4.69) [!hurd-any],
@@ -15,7 +14,6 @@
  x11proto-gl-dev (>= 1.4.14),
  libxxf86vm-dev,
  libexpat1-dev,
- dpkg-dev (>= 1.16.1),
  libgcrypt20-dev [amd64 i386 arm64 armhf ppc64el x32],
  libsensors4-dev [!hurd-any],
  libxfixes-dev,
@@ -40,16 +38,16 @@
  libxcb-randr0-dev,
  libxcb-sync-dev,
  libxshmfence-dev (>= 1.1),
- libmirclient-dev,
- mir-client-platform-mesa-dev,
+ libmirclient-dev [linux-any],
+ mir-client-platform-mesa-dev [linux-any],
  python,
  python-mako,
  flex,
  bison,
- llvm-3.9-dev (>= 1:3.9) [amd64 i386 kfreebsd-amd64 kfreebsd-i386 arm64 armhf ppc64el],
+ llvm-3.9-dev (>= 1:3.9.1) [amd64 i386 kfreebsd-amd64 kfreebsd-i386 arm64 armhf ppc64el],
  libelf-dev [amd64 i386 kfreebsd-amd64 kfreebsd-i386 arm64 armhf ppc64el],
  libwayland-dev (>= 1.2.0) [linux-any],
- libclang-3.9-dev (>= 1:3.9) [amd64 i386 kfreebsd-amd64 kfreebsd-i386 arm64 armhf],
+ libclang-3.9-dev (>= 1:3.9.1) [amd64 i386 kfreebsd-amd64 kfreebsd-i386 arm64 armhf],
  libclc-dev (>= 0.2.0+git20160907) [amd64 i386 kfreebsd-amd64 kfreebsd-i386 arm64 armhf],
 Vcs-Git: https://anonscm.debian.org/git/pkg-xorg/lib/mesa.git
 Vcs-Browser: https://anonscm.debian.org/cgit/pkg-xorg/lib/mesa.git
@@ -157,7 +155,7 @@
  libxcb-sync-dev,
  libxshmfence-dev,
  libx11-xcb-dev,
- libmirclient-dev [!arm64 !powerpc !ppc64 !ppc64el],
+ libmirclient-dev [!arm64 !powerpc !ppc64 !ppc64el !kfreebsd-any !hurd-any],
  libwayland-dev (>= 1.2.0) [linux-any],
  ${misc:Depends},
 Multi-Arch: same
diff -Nru mesa-13.0.2/debian/patches/0001-intel-blorp_blit-Create-structure-for-src-dst-coordi.patch mesa-13.0.3/debian/patches/0001-intel-blorp_blit-Create-structure-for-src-dst-coordi.patch
--- mesa-13.0.2/debian/patches/0001-intel-blorp_blit-Create-structure-for-src-dst-coordi.patch	1970-01-01 00:00:00.000000000 +0000
+++ mesa-13.0.3/debian/patches/0001-intel-blorp_blit-Create-structure-for-src-dst-coordi.patch	2017-01-14 01:32:36.000000000 +0000
@@ -0,0 +1,137 @@
+From c64248223dd74ac67eb9033d39d51fc864f48595 Mon Sep 17 00:00:00 2001
+From: Jordan Justen <jordan.l.justen@intel.com>
+Date: Mon, 7 Nov 2016 14:06:49 -0800
+Subject: [PATCH 1/8] intel/blorp_blit: Create structure for src & dst
+ coordinates
+
+This will be useful for splitting blits into smaller sizes.
+
+We also make the coordinates of type double rather than float. Since
+we will be splitting and scaling the coordinates, we might require
+extra precision in the calculations.
+
+v2:
+ * Use double instead of float. (Jason)
+
+Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
+Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
+(cherry picked from commit b74d4f6ca02715470d8f7726d19aff342873dbc6)
+---
+ src/intel/blorp/blorp_blit.c | 75 +++++++++++++++++++++++++++++++++-----------
+ 1 file changed, 56 insertions(+), 19 deletions(-)
+
+diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
+index 018d997..e5232d4 100644
+--- a/src/intel/blorp/blorp_blit.c
++++ b/src/intel/blorp/blorp_blit.c
+@@ -1433,15 +1433,20 @@ surf_retile_w_to_y(const struct isl_device *isl_dev,
+    info->tile_y_sa /= 2;
+ }
+ 
++struct blt_axis {
++   double src0, src1, dst0, dst1;
++   bool mirror;
++};
++
++struct blt_coords {
++   struct blt_axis x, y;
++};
++
+ static void
+ do_blorp_blit(struct blorp_batch *batch,
+               struct blorp_params *params,
+               struct brw_blorp_blit_prog_key *wm_prog_key,
+-              float src_x0, float src_y0,
+-              float src_x1, float src_y1,
+-              float dst_x0, float dst_y0,
+-              float dst_x1, float dst_y1,
+-              bool mirror_x, bool mirror_y)
++              const struct blt_coords *coords)
+ {
+    const struct gen_device_info *devinfo = batch->blorp->isl_dev->info;
+ 
+@@ -1468,15 +1473,19 @@ do_blorp_blit(struct blorp_batch *batch,
+    /* Round floating point values to nearest integer to avoid "off by one texel"
+     * kind of errors when blitting.
+     */
+-   params->x0 = params->wm_inputs.discard_rect.x0 = roundf(dst_x0);
+-   params->y0 = params->wm_inputs.discard_rect.y0 = roundf(dst_y0);
+-   params->x1 = params->wm_inputs.discard_rect.x1 = roundf(dst_x1);
+-   params->y1 = params->wm_inputs.discard_rect.y1 = roundf(dst_y1);
++   params->x0 = params->wm_inputs.discard_rect.x0 = round(coords->x.dst0);
++   params->y0 = params->wm_inputs.discard_rect.y0 = round(coords->y.dst0);
++   params->x1 = params->wm_inputs.discard_rect.x1 = round(coords->x.dst1);
++   params->y1 = params->wm_inputs.discard_rect.y1 = round(coords->y.dst1);
+ 
+    brw_blorp_setup_coord_transform(&params->wm_inputs.coord_transform[0],
+-                                   src_x0, src_x1, dst_x0, dst_x1, mirror_x);
++                                   coords->x.src0, coords->x.src1,
++                                   coords->x.dst0, coords->x.dst1,
++                                   coords->x.mirror);
+    brw_blorp_setup_coord_transform(&params->wm_inputs.coord_transform[1],
+-                                   src_y0, src_y1, dst_y0, dst_y1, mirror_y);
++                                   coords->y.src0, coords->y.src1,
++                                   coords->y.dst0, coords->y.dst1,
++                                   coords->y.mirror);
+ 
+    if (devinfo->gen > 6 &&
+        params->dst.surf.msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) {
+@@ -1710,10 +1719,24 @@ blorp_blit(struct blorp_batch *batch,
+       minify(params.src.surf.logical_level0_px.height, src_level) *
+       wm_prog_key.y_scale - 1.0f;
+ 
+-   do_blorp_blit(batch, &params, &wm_prog_key,
+-                 src_x0, src_y0, src_x1, src_y1,
+-                 dst_x0, dst_y0, dst_x1, dst_y1,
+-                 mirror_x, mirror_y);
++   struct blt_coords coords = {
++      .x = {
++         .src0 = src_x0,
++         .src1 = src_x1,
++         .dst0 = dst_x0,
++         .dst1 = dst_x1,
++         .mirror = mirror_x
++      },
++      .y = {
++         .src0 = src_y0,
++         .src1 = src_y1,
++         .dst0 = dst_y0,
++         .dst1 = dst_y1,
++         .mirror = mirror_y
++      }
++   };
++
++   do_blorp_blit(batch, &params, &wm_prog_key, &coords);
+ }
+ 
+ static enum isl_format
+@@ -1891,8 +1914,22 @@ blorp_copy(struct blorp_batch *batch,
+       wm_prog_key.need_dst_offset = true;
+    }
+ 
+-   do_blorp_blit(batch, &params, &wm_prog_key,
+-                 src_x, src_y, src_x + src_width, src_y + src_height,
+-                 dst_x, dst_y, dst_x + dst_width, dst_y + dst_height,
+-                 false, false);
++   struct blt_coords coords = {
++      .x = {
++         .src0 = src_x,
++         .src1 = src_x + src_width,
++         .dst0 = dst_x,
++         .dst1 = dst_x + dst_width,
++         .mirror = false
++      },
++      .y = {
++         .src0 = src_y,
++         .src1 = src_y + src_height,
++         .dst0 = dst_y,
++         .dst1 = dst_y + dst_height,
++         .mirror = false
++      }
++   };
++
++   do_blorp_blit(batch, &params, &wm_prog_key, &coords);
+ }
+-- 
+2.7.4
+
diff -Nru mesa-13.0.2/debian/patches/0002-intel-blorp_blit-Split-blorp-blits-if-they-are-too-l.patch mesa-13.0.3/debian/patches/0002-intel-blorp_blit-Split-blorp-blits-if-they-are-too-l.patch
--- mesa-13.0.2/debian/patches/0002-intel-blorp_blit-Split-blorp-blits-if-they-are-too-l.patch	1970-01-01 00:00:00.000000000 +0000
+++ mesa-13.0.3/debian/patches/0002-intel-blorp_blit-Split-blorp-blits-if-they-are-too-l.patch	2017-01-14 01:32:36.000000000 +0000
@@ -0,0 +1,151 @@
+From bae700b49da2752b10cf47ee91685e8f848e0b3a Mon Sep 17 00:00:00 2001
+From: Jordan Justen <jordan.l.justen@intel.com>
+Date: Mon, 7 Nov 2016 14:06:56 -0800
+Subject: [PATCH 2/8] intel/blorp_blit: Split blorp blits if they are too large
+
+We rename do_blorp_blit() to try_blorp_blit(), and add a return error
+if the surface size for the blit is too large. Now, do_blorp_blit() is
+rewritten to try to split the blit into smaller operations if
+try_blorp_blit() fails.
+
+Note: In this commit, try_blorp_blit() will always attempt to blit and
+never return an error, which matches the previous behavior. We will
+enable the size checking and splitting in a future commit.
+
+The motivation for this splitting is that in some cases when we
+flatten an image, it's dimensions grow, and this can then exceed the
+programmable hardware limits. An example is w-tiled+MSAA blits.
+
+v2:
+ * Use double instead of float. (Jason)
+
+Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
+Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
+(cherry picked from commit 12e0a6e25967e097f9d18e9ee25b30248f617b28)
+---
+ src/intel/blorp/blorp_blit.c | 102 ++++++++++++++++++++++++++++++++++++++++---
+ 1 file changed, 96 insertions(+), 6 deletions(-)
+
+diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
+index e5232d4..8db8d5e 100644
+--- a/src/intel/blorp/blorp_blit.c
++++ b/src/intel/blorp/blorp_blit.c
+@@ -1442,11 +1442,21 @@ struct blt_coords {
+    struct blt_axis x, y;
+ };
+ 
+-static void
+-do_blorp_blit(struct blorp_batch *batch,
+-              struct blorp_params *params,
+-              struct brw_blorp_blit_prog_key *wm_prog_key,
+-              const struct blt_coords *coords)
++enum blit_shrink_status {
++   BLIT_NO_SHRINK = 0,
++   BLIT_WIDTH_SHRINK = 1,
++   BLIT_HEIGHT_SHRINK = 2,
++};
++
++/* Try to blit. If the surface parameters exceed the size allowed by hardware,
++ * then enum blit_shrink_status will be returned. If BLIT_NO_SHRINK is
++ * returned, then the blit was successful.
++ */
++static enum blit_shrink_status
++try_blorp_blit(struct blorp_batch *batch,
++               struct blorp_params *params,
++               struct brw_blorp_blit_prog_key *wm_prog_key,
++               const struct blt_coords *coords)
+ {
+    const struct gen_device_info *devinfo = batch->blorp->isl_dev->info;
+ 
+@@ -1647,7 +1657,87 @@ do_blorp_blit(struct blorp_batch *batch,
+ 
+    brw_blorp_get_blit_kernel(batch->blorp, params, wm_prog_key);
+ 
+-   batch->blorp->exec(batch, params);
++   unsigned result = 0;
++
++   if (result == 0) {
++      batch->blorp->exec(batch, params);
++   }
++
++   return result;
++}
++
++/* Adjust split blit source coordinates for the current destination
++ * coordinates.
++ */
++static void
++adjust_split_source_coords(const struct blt_axis *orig,
++                           struct blt_axis *split_coords,
++                           double scale)
++{
++   /* When scale is greater than 0, then we are growing from the start, so
++    * src0 uses delta0, and src1 uses delta1. When scale is less than 0, the
++    * source range shrinks from the end. In that case src0 is adjusted by
++    * delta1, and src1 is adjusted by delta0.
++    */
++   double delta0 = scale * (split_coords->dst0 - orig->dst0);
++   double delta1 = scale * (split_coords->dst1 - orig->dst1);
++   split_coords->src0 = orig->src0 + (scale >= 0.0 ? delta0 : delta1);
++   split_coords->src1 = orig->src1 + (scale >= 0.0 ? delta1 : delta0);
++}
++
++static void
++do_blorp_blit(struct blorp_batch *batch,
++              struct blorp_params *params,
++              struct brw_blorp_blit_prog_key *wm_prog_key,
++              const struct blt_coords *orig)
++{
++   struct blt_coords split_coords = *orig;
++   double w = orig->x.dst1 - orig->x.dst0;
++   double h = orig->y.dst1 - orig->y.dst0;
++   double x_scale = (orig->x.src1 - orig->x.src0) / w;
++   double y_scale = (orig->y.src1 - orig->y.src0) / h;
++   if (orig->x.mirror)
++      x_scale = -x_scale;
++   if (orig->y.mirror)
++      y_scale = -y_scale;
++
++   bool x_done, y_done;
++   do {
++      enum blit_shrink_status result =
++         try_blorp_blit(batch, params, wm_prog_key, &split_coords);
++
++      if (result & BLIT_WIDTH_SHRINK) {
++         w /= 2.0;
++         assert(w >= 1.0);
++         split_coords.x.dst1 = MIN2(split_coords.x.dst0 + w, orig->x.dst1);
++         adjust_split_source_coords(&orig->x, &split_coords.x, x_scale);
++      }
++      if (result & BLIT_HEIGHT_SHRINK) {
++         h /= 2.0;
++         assert(h >= 1.0);
++         split_coords.y.dst1 = MIN2(split_coords.y.dst0 + h, orig->y.dst1);
++         adjust_split_source_coords(&orig->y, &split_coords.y, y_scale);
++      }
++
++      if (result != 0)
++         continue;
++
++      y_done = (orig->y.dst1 - split_coords.y.dst1 < 0.5);
++      x_done = y_done && (orig->x.dst1 - split_coords.x.dst1 < 0.5);
++      if (x_done) {
++         break;
++      } else if (y_done) {
++         split_coords.x.dst0 += w;
++         split_coords.x.dst1 = MIN2(split_coords.x.dst0 + w, orig->x.dst1);
++         split_coords.y.dst0 = orig->y.dst0;
++         split_coords.y.dst1 = MIN2(split_coords.y.dst0 + h, orig->y.dst1);
++         adjust_split_source_coords(&orig->x, &split_coords.x, x_scale);
++      } else {
++         split_coords.y.dst0 += h;
++         split_coords.y.dst1 = MIN2(split_coords.y.dst0 + h, orig->y.dst1);
++         adjust_split_source_coords(&orig->y, &split_coords.y, y_scale);
++      }
++   } while (true);
+ }
+ 
+ void
+-- 
+2.7.4
+
diff -Nru mesa-13.0.2/debian/patches/0003-intel-blorp_blit-Adjust-blorp-surface-parameters-for.patch mesa-13.0.3/debian/patches/0003-intel-blorp_blit-Adjust-blorp-surface-parameters-for.patch
--- mesa-13.0.2/debian/patches/0003-intel-blorp_blit-Adjust-blorp-surface-parameters-for.patch	1970-01-01 00:00:00.000000000 +0000
+++ mesa-13.0.3/debian/patches/0003-intel-blorp_blit-Adjust-blorp-surface-parameters-for.patch	2017-01-14 01:32:36.000000000 +0000
@@ -0,0 +1,160 @@
+From e95729a4cbaaf33f093ff241c58dc584493f5a87 Mon Sep 17 00:00:00 2001
+From: Jordan Justen <jordan.l.justen@intel.com>
+Date: Mon, 7 Nov 2016 14:08:22 -0800
+Subject: [PATCH 3/8] intel/blorp_blit: Adjust blorp surface parameters for
+ split blits
+
+If try_blorp_blit() previously returned that a blit was too large,
+shrink_surface_params() will be used to update the surface parameters
+for the smaller blit so the blit operation can proceed.
+
+v2:
+ * Use double instead of float. (Jason)
+
+Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
+Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
+(cherry picked from commit edf3113aeddcf66cb24906e53a2d4f41616f8985)
+---
+ src/intel/blorp/blorp_blit.c | 97 ++++++++++++++++++++++++++++++++++++++++++--
+ 1 file changed, 94 insertions(+), 3 deletions(-)
+
+diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
+index 8db8d5e..7c4158f 100644
+--- a/src/intel/blorp/blorp_blit.c
++++ b/src/intel/blorp/blorp_blit.c
+@@ -1433,6 +1433,12 @@ surf_retile_w_to_y(const struct isl_device *isl_dev,
+    info->tile_y_sa /= 2;
+ }
+ 
++static bool
++can_shrink_surfaces(const struct blorp_params *params)
++{
++   return false;
++}
++
+ struct blt_axis {
+    double src0, src1, dst0, dst1;
+    bool mirror;
+@@ -1685,12 +1691,88 @@ adjust_split_source_coords(const struct blt_axis *orig,
+    split_coords->src1 = orig->src1 + (scale >= 0.0 ? delta1 : delta0);
+ }
+ 
++static const struct isl_extent2d
++get_px_size_sa(const struct isl_surf *surf)
++{
++   static const struct isl_extent2d one_to_one = { .w = 1, .h = 1 };
++
++   if (surf->msaa_layout != ISL_MSAA_LAYOUT_INTERLEAVED)
++      return one_to_one;
++   else
++      return isl_get_interleaved_msaa_px_size_sa(surf->samples);
++}
++
++static void
++shrink_surface_params(const struct isl_device *dev,
++                      struct brw_blorp_surface_info *info,
++                      double *x0, double *x1, double *y0, double *y1)
++{
++   uint32_t byte_offset, x_offset_sa, y_offset_sa, size;
++   struct isl_extent2d px_size_sa;
++   int adjust;
++
++   surf_convert_to_single_slice(dev, info);
++
++   px_size_sa = get_px_size_sa(&info->surf);
++
++   /* Because this gets called after we lower compressed images, the tile
++    * offsets may be non-zero and we need to incorporate them in our
++    * calculations.
++    */
++   x_offset_sa = (uint32_t)*x0 * px_size_sa.w + info->tile_x_sa;
++   y_offset_sa = (uint32_t)*y0 * px_size_sa.h + info->tile_y_sa;
++   isl_tiling_get_intratile_offset_sa(dev, info->surf.tiling,
++                                      info->surf.format, info->surf.row_pitch,
++                                      x_offset_sa, y_offset_sa,
++                                      &byte_offset,
++                                      &info->tile_x_sa, &info->tile_y_sa);
++
++   info->addr.offset += byte_offset;
++
++   adjust = (int)info->tile_x_sa / px_size_sa.w - (int)*x0;
++   *x0 += adjust;
++   *x1 += adjust;
++   info->tile_x_sa = 0;
++
++   adjust = (int)info->tile_y_sa / px_size_sa.h - (int)*y0;
++   *y0 += adjust;
++   *y1 += adjust;
++   info->tile_y_sa = 0;
++
++   size = MIN2((uint32_t)ceil(*x1), info->surf.logical_level0_px.width);
++   info->surf.logical_level0_px.width = size;
++   info->surf.phys_level0_sa.width = size * px_size_sa.w;
++
++   size = MIN2((uint32_t)ceil(*y1), info->surf.logical_level0_px.height);
++   info->surf.logical_level0_px.height = size;
++   info->surf.phys_level0_sa.height = size * px_size_sa.h;
++}
++
++static void
++shrink_surfaces(const struct isl_device *dev,
++                struct blorp_params *params,
++                struct brw_blorp_blit_prog_key *wm_prog_key,
++                struct blt_coords *coords)
++{
++   /* Shrink source surface */
++   shrink_surface_params(dev, &params->src, &coords->x.src0, &coords->x.src1,
++                         &coords->y.src0, &coords->y.src1);
++   wm_prog_key->need_src_offset = false;
++
++   /* Shrink destination surface */
++   shrink_surface_params(dev, &params->dst, &coords->x.dst0, &coords->x.dst1,
++                         &coords->y.dst0, &coords->y.dst1);
++   wm_prog_key->need_dst_offset = false;
++}
++
+ static void
+ do_blorp_blit(struct blorp_batch *batch,
+-              struct blorp_params *params,
++              const struct blorp_params *orig_params,
+               struct brw_blorp_blit_prog_key *wm_prog_key,
+               const struct blt_coords *orig)
+ {
++   struct blorp_params params;
++   struct blt_coords blit_coords;
+    struct blt_coords split_coords = *orig;
+    double w = orig->x.dst1 - orig->x.dst0;
+    double h = orig->y.dst1 - orig->y.dst0;
+@@ -1702,9 +1784,15 @@ do_blorp_blit(struct blorp_batch *batch,
+       y_scale = -y_scale;
+ 
+    bool x_done, y_done;
++   bool shrink = false;
+    do {
++      params = *orig_params;
++      blit_coords = split_coords;
++      if (shrink)
++         shrink_surfaces(batch->blorp->isl_dev, &params, wm_prog_key,
++                         &blit_coords);
+       enum blit_shrink_status result =
+-         try_blorp_blit(batch, params, wm_prog_key, &split_coords);
++         try_blorp_blit(batch, &params, wm_prog_key, &blit_coords);
+ 
+       if (result & BLIT_WIDTH_SHRINK) {
+          w /= 2.0;
+@@ -1719,8 +1807,11 @@ do_blorp_blit(struct blorp_batch *batch,
+          adjust_split_source_coords(&orig->y, &split_coords.y, y_scale);
+       }
+ 
+-      if (result != 0)
++      if (result != 0) {
++         assert(can_shrink_surfaces(orig_params));
++         shrink = true;
+          continue;
++      }
+ 
+       y_done = (orig->y.dst1 - split_coords.y.dst1 < 0.5);
+       x_done = y_done && (orig->x.dst1 - split_coords.x.dst1 < 0.5);
+-- 
+2.7.4
+
diff -Nru mesa-13.0.2/debian/patches/0004-intel-blorp-Always-use-UINT-formats-on-SKL.patch mesa-13.0.3/debian/patches/0004-intel-blorp-Always-use-UINT-formats-on-SKL.patch
--- mesa-13.0.2/debian/patches/0004-intel-blorp-Always-use-UINT-formats-on-SKL.patch	1970-01-01 00:00:00.000000000 +0000
+++ mesa-13.0.3/debian/patches/0004-intel-blorp-Always-use-UINT-formats-on-SKL.patch	2017-01-14 01:32:36.000000000 +0000
@@ -0,0 +1,140 @@
+From 51e1943f1b4a6a3412aeb5f90ffe24ec17aaa213 Mon Sep 17 00:00:00 2001
+From: Jason Ekstrand <jason.ekstrand@intel.com>
+Date: Wed, 26 Oct 2016 01:58:16 -0700
+Subject: [PATCH 4/8] intel/blorp: Always use UINT formats on SKL+
+
+Many of these UINT formats aren't available prior to Sky Lake so we used
+UNORM formats.  Using UINT formats is a bit nicer because it guarantees we
+don't run into rounding issues.  Also, we will need it in the next commit
+for handling copies with CCS enabled.
+
+Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
+(cherry picked from commit 89f9c46a74ede69148bb5431a0deb8f09f3fa278)
+---
+ src/intel/blorp/blorp_blit.c | 66 +++++++++++++++++++++++++++++---------------
+ 1 file changed, 44 insertions(+), 22 deletions(-)
+
+diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
+index 7c4158f..f8c5d4c 100644
+--- a/src/intel/blorp/blorp_blit.c
++++ b/src/intel/blorp/blorp_blit.c
+@@ -1921,32 +1921,47 @@ blorp_blit(struct blorp_batch *batch,
+ }
+ 
+ static enum isl_format
+-get_copy_format_for_bpb(unsigned bpb)
++get_copy_format_for_bpb(const struct isl_device *isl_dev, unsigned bpb)
+ {
+-   /* The choice of UNORM and UINT formats is very intentional here.  Most of
+-    * the time, we want to use a UINT format to avoid any rounding error in
+-    * the blit.  For stencil blits, R8_UINT is required by the hardware.
++   /* The choice of UNORM and UINT formats is very intentional here.  Most
++    * of the time, we want to use a UINT format to avoid any rounding error
++    * in the blit.  For stencil blits, R8_UINT is required by the hardware.
+     * (It's the only format allowed in conjunction with W-tiling.)  Also we
+     * intentionally use the 4-channel formats whenever we can.  This is so
+-    * that, when we do a RGB <-> RGBX copy, the two formats will line up even
+-    * though one of them is 3/4 the size of the other.  The choice of UNORM
+-    * vs. UINT is also very intentional because Haswell doesn't handle 8 or
+-    * 16-bit RGB UINT formats at all so we have to use UNORM there.
++    * that, when we do a RGB <-> RGBX copy, the two formats will line up
++    * even though one of them is 3/4 the size of the other.  The choice of
++    * UNORM vs. UINT is also very intentional because we don't have 8 or
++    * 16-bit RGB UINT formats until Sky Lake so we have to use UNORM there.
+     * Fortunately, the only time we should ever use two different formats in
+     * the table below is for RGB -> RGBA blits and so we will never have any
+     * UNORM/UINT mismatch.
+     */
+-   switch (bpb) {
+-   case 8:  return ISL_FORMAT_R8_UINT;
+-   case 16: return ISL_FORMAT_R8G8_UINT;
+-   case 24: return ISL_FORMAT_R8G8B8_UNORM;
+-   case 32: return ISL_FORMAT_R8G8B8A8_UNORM;
+-   case 48: return ISL_FORMAT_R16G16B16_UNORM;
+-   case 64: return ISL_FORMAT_R16G16B16A16_UNORM;
+-   case 96: return ISL_FORMAT_R32G32B32_UINT;
+-   case 128:return ISL_FORMAT_R32G32B32A32_UINT;
+-   default:
+-      unreachable("Unknown format bpb");
++   if (ISL_DEV_GEN(isl_dev) >= 9) {
++      switch (bpb) {
++      case 8:  return ISL_FORMAT_R8_UINT;
++      case 16: return ISL_FORMAT_R8G8_UINT;
++      case 24: return ISL_FORMAT_R8G8B8_UINT;
++      case 32: return ISL_FORMAT_R8G8B8A8_UINT;
++      case 48: return ISL_FORMAT_R16G16B16_UINT;
++      case 64: return ISL_FORMAT_R16G16B16A16_UINT;
++      case 96: return ISL_FORMAT_R32G32B32_UINT;
++      case 128:return ISL_FORMAT_R32G32B32A32_UINT;
++      default:
++         unreachable("Unknown format bpb");
++      }
++   } else {
++      switch (bpb) {
++      case 8:  return ISL_FORMAT_R8_UINT;
++      case 16: return ISL_FORMAT_R8G8_UINT;
++      case 24: return ISL_FORMAT_R8G8B8_UNORM;
++      case 32: return ISL_FORMAT_R8G8B8A8_UNORM;
++      case 48: return ISL_FORMAT_R16G16B16_UNORM;
++      case 64: return ISL_FORMAT_R16G16B16A16_UNORM;
++      case 96: return ISL_FORMAT_R32G32B32_UINT;
++      case 128:return ISL_FORMAT_R32G32B32A32_UINT;
++      default:
++         unreachable("Unknown format bpb");
++      }
+    }
+ }
+ 
+@@ -2002,7 +2017,7 @@ surf_convert_to_uncompressed(const struct isl_device *isl_dev,
+    info->tile_y_sa /= fmtl->bh;
+ 
+    /* It's now an uncompressed surface so we need an uncompressed format */
+-   info->surf.format = get_copy_format_for_bpb(fmtl->bpb);
++   info->surf.format = get_copy_format_for_bpb(isl_dev, fmtl->bpb);
+ }
+ 
+ static void
+@@ -2022,9 +2037,15 @@ surf_fake_rgb_with_red(const struct isl_device *isl_dev,
+    case ISL_FORMAT_R8G8B8_UNORM:
+       red_format = ISL_FORMAT_R8_UNORM;
+       break;
++   case ISL_FORMAT_R8G8B8_UINT:
++      red_format = ISL_FORMAT_R8_UINT;
++      break;
+    case ISL_FORMAT_R16G16B16_UNORM:
+       red_format = ISL_FORMAT_R16_UNORM;
+       break;
++   case ISL_FORMAT_R16G16B16_UINT:
++      red_format = ISL_FORMAT_R16_UINT;
++      break;
+    case ISL_FORMAT_R32G32B32_UINT:
+       red_format = ISL_FORMAT_R32_UINT;
+       break;
+@@ -2049,6 +2070,7 @@ blorp_copy(struct blorp_batch *batch,
+            uint32_t dst_x, uint32_t dst_y,
+            uint32_t src_width, uint32_t src_height)
+ {
++   const struct isl_device *isl_dev = batch->blorp->isl_dev;
+    struct blorp_params params;
+ 
+    if (src_width == 0 || src_height == 0)
+@@ -2068,14 +2090,14 @@ blorp_copy(struct blorp_batch *batch,
+    const struct isl_format_layout *dst_fmtl =
+       isl_format_get_layout(params.dst.surf.format);
+ 
+-   params.src.view.format = get_copy_format_for_bpb(src_fmtl->bpb);
++   params.src.view.format = get_copy_format_for_bpb(isl_dev, src_fmtl->bpb);
+    if (src_fmtl->bw > 1 || src_fmtl->bh > 1) {
+       surf_convert_to_uncompressed(batch->blorp->isl_dev, &params.src,
+                                    &src_x, &src_y, &src_width, &src_height);
+       wm_prog_key.need_src_offset = true;
+    }
+ 
+-   params.dst.view.format = get_copy_format_for_bpb(dst_fmtl->bpb);
++   params.dst.view.format = get_copy_format_for_bpb(isl_dev, dst_fmtl->bpb);
+    if (dst_fmtl->bw > 1 || dst_fmtl->bh > 1) {
+       surf_convert_to_uncompressed(batch->blorp->isl_dev, &params.dst,
+                                    &dst_x, &dst_y, NULL, NULL);
+-- 
+2.7.4
+
diff -Nru mesa-13.0.2/debian/patches/0005-intel-blorp_blit-Move-RGB-R-conversion-to-follow-bli.patch mesa-13.0.3/debian/patches/0005-intel-blorp_blit-Move-RGB-R-conversion-to-follow-bli.patch
--- mesa-13.0.2/debian/patches/0005-intel-blorp_blit-Move-RGB-R-conversion-to-follow-bli.patch	1970-01-01 00:00:00.000000000 +0000
+++ mesa-13.0.3/debian/patches/0005-intel-blorp_blit-Move-RGB-R-conversion-to-follow-bli.patch	2017-01-14 01:32:36.000000000 +0000
@@ -0,0 +1,173 @@
+From bf42682940d3dff85acbadebbabe82e3b73bb8d4 Mon Sep 17 00:00:00 2001
+From: Jordan Justen <jordan.l.justen@intel.com>
+Date: Wed, 30 Nov 2016 15:53:48 -0800
+Subject: [PATCH 5/8] intel/blorp_blit: Move RGB=>R conversion to follow blit
+ splitting
+
+In blorp_copy, when RGB surfaces are copied, we convert the
+destination surface to a Red only surface, but 3 times as wide. This
+introduces an implicit restriction of "mod 3" for the destination
+width.
+
+It is easier to handle the blorp split buffer offsetting with the
+original RGB surface, and do the RGB=>R after this.
+
+Suggested-by: Jason Ekstrand <jason@jlekstrand.net>
+Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
+Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
+(cherry picked from commit efea8e724458f6a388fb70421db3e655719fffb0)
+---
+ src/intel/blorp/blorp_blit.c | 113 +++++++++++++++++++++++++------------------
+ 1 file changed, 65 insertions(+), 48 deletions(-)
+
+diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
+index f8c5d4c..662dab0 100644
+--- a/src/intel/blorp/blorp_blit.c
++++ b/src/intel/blorp/blorp_blit.c
+@@ -1448,6 +1448,68 @@ struct blt_coords {
+    struct blt_axis x, y;
+ };
+ 
++static void
++surf_fake_rgb_with_red(const struct isl_device *isl_dev,
++                       struct brw_blorp_surface_info *info,
++                       uint32_t *x, uint32_t *width)
++{
++   surf_convert_to_single_slice(isl_dev, info);
++
++   info->surf.logical_level0_px.width *= 3;
++   info->surf.phys_level0_sa.width *= 3;
++   *x *= 3;
++   *width *= 3;
++
++   enum isl_format red_format;
++   switch (info->view.format) {
++   case ISL_FORMAT_R8G8B8_UNORM:
++      red_format = ISL_FORMAT_R8_UNORM;
++      break;
++   case ISL_FORMAT_R8G8B8_UINT:
++      red_format = ISL_FORMAT_R8_UINT;
++      break;
++   case ISL_FORMAT_R16G16B16_UNORM:
++      red_format = ISL_FORMAT_R16_UNORM;
++      break;
++   case ISL_FORMAT_R16G16B16_UINT:
++      red_format = ISL_FORMAT_R16_UINT;
++      break;
++   case ISL_FORMAT_R32G32B32_UINT:
++      red_format = ISL_FORMAT_R32_UINT;
++      break;
++   default:
++      unreachable("Invalid RGB copy destination format");
++   }
++   assert(isl_format_get_layout(red_format)->channels.r.type ==
++          isl_format_get_layout(info->view.format)->channels.r.type);
++   assert(isl_format_get_layout(red_format)->channels.r.bits ==
++          isl_format_get_layout(info->view.format)->channels.r.bits);
++
++   info->surf.format = info->view.format = red_format;
++}
++
++static void
++fake_dest_rgb_with_red(const struct isl_device *dev,
++                       struct blorp_params *params,
++                       struct brw_blorp_blit_prog_key *wm_prog_key,
++                       struct blt_coords *coords)
++{
++   /* Handle RGB destinations for blorp_copy */
++   const struct isl_format_layout *dst_fmtl =
++      isl_format_get_layout(params->dst.surf.format);
++
++   if (dst_fmtl->bpb % 3 == 0) {
++      uint32_t dst_x = coords->x.dst0;
++      uint32_t dst_width = coords->x.dst1 - dst_x;
++      surf_fake_rgb_with_red(dev, &params->dst,
++                             &dst_x, &dst_width);
++      coords->x.dst0 = dst_x;
++      coords->x.dst1 = dst_x + dst_width;
++      wm_prog_key->dst_rgb = true;
++      wm_prog_key->need_dst_offset = true;
++   }
++}
++
+ enum blit_shrink_status {
+    BLIT_NO_SHRINK = 0,
+    BLIT_WIDTH_SHRINK = 1,
+@@ -1462,10 +1524,12 @@ static enum blit_shrink_status
+ try_blorp_blit(struct blorp_batch *batch,
+                struct blorp_params *params,
+                struct brw_blorp_blit_prog_key *wm_prog_key,
+-               const struct blt_coords *coords)
++               struct blt_coords *coords)
+ {
+    const struct gen_device_info *devinfo = batch->blorp->isl_dev->info;
+ 
++   fake_dest_rgb_with_red(batch->blorp->isl_dev, params, wm_prog_key, coords);
++
+    if (isl_format_has_sint_channel(params->src.view.format)) {
+       wm_prog_key->texture_data_type = nir_type_int;
+    } else if (isl_format_has_uint_channel(params->src.view.format)) {
+@@ -2020,46 +2084,6 @@ surf_convert_to_uncompressed(const struct isl_device *isl_dev,
+    info->surf.format = get_copy_format_for_bpb(isl_dev, fmtl->bpb);
+ }
+ 
+-static void
+-surf_fake_rgb_with_red(const struct isl_device *isl_dev,
+-                       struct brw_blorp_surface_info *info,
+-                       uint32_t *x, uint32_t *width)
+-{
+-   surf_convert_to_single_slice(isl_dev, info);
+-
+-   info->surf.logical_level0_px.width *= 3;
+-   info->surf.phys_level0_sa.width *= 3;
+-   *x *= 3;
+-   *width *= 3;
+-
+-   enum isl_format red_format;
+-   switch (info->view.format) {
+-   case ISL_FORMAT_R8G8B8_UNORM:
+-      red_format = ISL_FORMAT_R8_UNORM;
+-      break;
+-   case ISL_FORMAT_R8G8B8_UINT:
+-      red_format = ISL_FORMAT_R8_UINT;
+-      break;
+-   case ISL_FORMAT_R16G16B16_UNORM:
+-      red_format = ISL_FORMAT_R16_UNORM;
+-      break;
+-   case ISL_FORMAT_R16G16B16_UINT:
+-      red_format = ISL_FORMAT_R16_UINT;
+-      break;
+-   case ISL_FORMAT_R32G32B32_UINT:
+-      red_format = ISL_FORMAT_R32_UINT;
+-      break;
+-   default:
+-      unreachable("Invalid RGB copy destination format");
+-   }
+-   assert(isl_format_get_layout(red_format)->channels.r.type ==
+-          isl_format_get_layout(info->view.format)->channels.r.type);
+-   assert(isl_format_get_layout(red_format)->channels.r.bits ==
+-          isl_format_get_layout(info->view.format)->channels.r.bits);
+-
+-   info->surf.format = info->view.format = red_format;
+-}
+-
+ void
+ blorp_copy(struct blorp_batch *batch,
+            const struct blorp_surf *src_surf,
+@@ -2110,13 +2134,6 @@ blorp_copy(struct blorp_batch *batch,
+    uint32_t dst_width = src_width;
+    uint32_t dst_height = src_height;
+ 
+-   if (dst_fmtl->bpb % 3 == 0) {
+-      surf_fake_rgb_with_red(batch->blorp->isl_dev, &params.dst,
+-                             &dst_x, &dst_width);
+-      wm_prog_key.dst_rgb = true;
+-      wm_prog_key.need_dst_offset = true;
+-   }
+-
+    struct blt_coords coords = {
+       .x = {
+          .src0 = src_x,
+-- 
+2.7.4
+
diff -Nru mesa-13.0.2/debian/patches/0006-intel-blorp_blit-Enable-splitting-large-blorp-blits.patch mesa-13.0.3/debian/patches/0006-intel-blorp_blit-Enable-splitting-large-blorp-blits.patch
--- mesa-13.0.2/debian/patches/0006-intel-blorp_blit-Enable-splitting-large-blorp-blits.patch	1970-01-01 00:00:00.000000000 +0000
+++ mesa-13.0.3/debian/patches/0006-intel-blorp_blit-Enable-splitting-large-blorp-blits.patch	2017-01-14 01:32:36.000000000 +0000
@@ -0,0 +1,87 @@
+From 540db63cab001051a997e69573e6a441a6133763 Mon Sep 17 00:00:00 2001
+From: Jordan Justen <jordan.l.justen@intel.com>
+Date: Mon, 7 Nov 2016 14:07:07 -0800
+Subject: [PATCH 6/8] intel/blorp_blit: Enable splitting large blorp blits
+
+Detect when the surface sizes are too large for a blorp blit. When it
+is too large, the blorp blit will be split into a smaller operation
+and attempted again.
+
+For gen7, this fixes the cts test:
+
+ES3-CTS.gtf.GL3Tests.framebuffer_blit.framebuffer_blit_functionality_multisampled_to_singlesampled_blit
+
+It will also enable us to increase our renderable size from 8k x 8k to
+16k x 16k.
+
+Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
+Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
+(cherry picked from commit da381ae6475dfd35f1ab8c6063b4dce368ef7588)
+---
+ src/intel/blorp/blorp_blit.c | 41 ++++++++++++++++++++++++++++++++++++++++-
+ 1 file changed, 40 insertions(+), 1 deletion(-)
+
+diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
+index 662dab0..51241cd 100644
+--- a/src/intel/blorp/blorp_blit.c
++++ b/src/intel/blorp/blorp_blit.c
+@@ -1434,9 +1434,41 @@ surf_retile_w_to_y(const struct isl_device *isl_dev,
+ }
+ 
+ static bool
++can_shrink_surface(const struct brw_blorp_surface_info *surf)
++{
++   /* The current code doesn't support offsets into the aux buffers. This
++    * should be possible, but we need to make sure the offset is page
++    * aligned for both the surface and the aux buffer surface. Generally
++    * this mean using the page aligned offset for the aux buffer.
++    *
++    * Currently the cases where we must split the blit are limited to cases
++    * where we don't have a aux buffer.
++    */
++   if (surf->aux_addr.buffer != NULL)
++      return false;
++
++   /* We can't support splitting the blit for gen <= 7, because the qpitch
++    * size is calculated by the hardware based on the surface height for
++    * gen <= 7. In gen >= 8, the qpitch is controlled by the driver.
++    */
++   if (surf->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY)
++      return false;
++
++   return true;
++}
++
++static bool
+ can_shrink_surfaces(const struct blorp_params *params)
+ {
+-   return false;
++   return
++      can_shrink_surface(&params->src) &&
++      can_shrink_surface(&params->dst);
++}
++
++static unsigned
++get_max_surface_size()
++{
++   return 16384;
+ }
+ 
+ struct blt_axis {
+@@ -1728,6 +1760,13 @@ try_blorp_blit(struct blorp_batch *batch,
+    brw_blorp_get_blit_kernel(batch->blorp, params, wm_prog_key);
+ 
+    unsigned result = 0;
++   unsigned max_surface_size = get_max_surface_size(devinfo, params);
++   if (params->src.surf.logical_level0_px.width > max_surface_size ||
++       params->dst.surf.logical_level0_px.width > max_surface_size)
++      result |= BLIT_WIDTH_SHRINK;
++   if (params->src.surf.logical_level0_px.height > max_surface_size ||
++       params->dst.surf.logical_level0_px.height > max_surface_size)
++      result |= BLIT_HEIGHT_SHRINK;
+ 
+    if (result == 0) {
+       batch->blorp->exec(batch, params);
+-- 
+2.7.4
+
diff -Nru mesa-13.0.2/debian/patches/0007-intel-blorp_blit-Add-split_blorp_blit_debug-switch.patch mesa-13.0.3/debian/patches/0007-intel-blorp_blit-Add-split_blorp_blit_debug-switch.patch
--- mesa-13.0.2/debian/patches/0007-intel-blorp_blit-Add-split_blorp_blit_debug-switch.patch	1970-01-01 00:00:00.000000000 +0000
+++ mesa-13.0.3/debian/patches/0007-intel-blorp_blit-Add-split_blorp_blit_debug-switch.patch	2017-01-14 01:32:36.000000000 +0000
@@ -0,0 +1,57 @@
+From be1dd376753de6759aeb0638793b1d9bf3d1cad1 Mon Sep 17 00:00:00 2001
+From: Jordan Justen <jordan.l.justen@intel.com>
+Date: Tue, 22 Nov 2016 17:20:42 -0800
+Subject: [PATCH 7/8] intel/blorp_blit: Add split_blorp_blit_debug switch
+
+Enabling this debug switch causes surface shrinking to happen by
+default, and lowers the surface size limit which causes blorp blits to
+be split.
+
+Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
+Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
+(cherry picked from commit d6526d724765e14fc9bb25cd2a53463a4d1c5fff)
+---
+ src/intel/blorp/blorp_blit.c | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
+index 51241cd..ae03680 100644
+--- a/src/intel/blorp/blorp_blit.c
++++ b/src/intel/blorp/blorp_blit.c
+@@ -28,6 +28,8 @@
+ 
+ #define FILE_DEBUG_FLAG DEBUG_BLORP
+ 
++static const bool split_blorp_blit_debug = false;
++
+ /**
+  * Enum to specify the order of arguments in a sampler message
+  */
+@@ -1466,9 +1468,13 @@ can_shrink_surfaces(const struct blorp_params *params)
+ }
+ 
+ static unsigned
+-get_max_surface_size()
++get_max_surface_size(const struct gen_device_info *devinfo,
++                     const struct blorp_params *params)
+ {
+-   return 16384;
++   if (split_blorp_blit_debug && can_shrink_surfaces(params))
++      return 16384 >> 4; /* A smaller restriction when debug is enabled */
++   else
++      return 16384;
+ }
+ 
+ struct blt_axis {
+@@ -1887,7 +1893,7 @@ do_blorp_blit(struct blorp_batch *batch,
+       y_scale = -y_scale;
+ 
+    bool x_done, y_done;
+-   bool shrink = false;
++   bool shrink = split_blorp_blit_debug && can_shrink_surfaces(orig_params);
+    do {
+       params = *orig_params;
+       blit_coords = split_coords;
+-- 
+2.7.4
+
diff -Nru mesa-13.0.2/debian/patches/0008-i965-Increase-max-texture-to-16k-for-gen7.patch mesa-13.0.3/debian/patches/0008-i965-Increase-max-texture-to-16k-for-gen7.patch
--- mesa-13.0.2/debian/patches/0008-i965-Increase-max-texture-to-16k-for-gen7.patch	1970-01-01 00:00:00.000000000 +0000
+++ mesa-13.0.3/debian/patches/0008-i965-Increase-max-texture-to-16k-for-gen7.patch	2017-01-14 01:32:36.000000000 +0000
@@ -0,0 +1,45 @@
+From 9a19d5a37ae479767d56706275cbe7cc9770d5c7 Mon Sep 17 00:00:00 2001
+From: Jordan Justen <jordan.l.justen@intel.com>
+Date: Thu, 3 Nov 2016 12:20:19 -0700
+Subject: [PATCH 8/8] i965: Increase max texture to 16k for gen7+
+
+Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98297
+Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
+Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
+(cherry picked from commit e9133dd90ec498cfb6a23fa22504e06488352c51)
+---
+ src/mesa/drivers/dri/i965/brw_context.c | 13 ++++++++++---
+ 1 file changed, 10 insertions(+), 3 deletions(-)
+
+diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
+index d6204fd..0ccc574 100644
+--- a/src/mesa/drivers/dri/i965/brw_context.c
++++ b/src/mesa/drivers/dri/i965/brw_context.c
+@@ -523,14 +523,21 @@ brw_initialize_context_constants(struct brw_context *brw)
+ 
+    ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
+    ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
+-   ctx->Const.MaxRenderbufferSize = 8192;
+-   ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS);
++   if (brw->gen >= 7) {
++      ctx->Const.MaxRenderbufferSize = 16384;
++      ctx->Const.MaxTextureLevels = MIN2(15 /* 16384 */, MAX_TEXTURE_LEVELS);
++      ctx->Const.MaxCubeTextureLevels = 15; /* 16384 */
++   } else {
++      ctx->Const.MaxRenderbufferSize = 8192;
++      ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS);
++      ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
++   }
+    ctx->Const.Max3DTextureLevels = 12; /* 2048 */
+-   ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
+    ctx->Const.MaxArrayTextureLayers = brw->gen >= 7 ? 2048 : 512;
+    ctx->Const.MaxTextureMbytes = 1536;
+    ctx->Const.MaxTextureRectSize = 1 << 12;
+    ctx->Const.MaxTextureMaxAnisotropy = 16.0;
++   ctx->Const.MaxTextureLodBias = 15.0;
+    ctx->Const.StripTextureBorder = true;
+    if (brw->gen >= 7)
+       ctx->Const.MaxProgramTextureGatherComponents = 4;
+-- 
+2.7.4
+
diff -Nru mesa-13.0.2/debian/patches/r600-sb-Fix-loop-optimization-related-hangs-on-eg.diff mesa-13.0.3/debian/patches/r600-sb-Fix-loop-optimization-related-hangs-on-eg.diff
--- mesa-13.0.2/debian/patches/r600-sb-Fix-loop-optimization-related-hangs-on-eg.diff	1970-01-01 00:00:00.000000000 +0000
+++ mesa-13.0.3/debian/patches/r600-sb-Fix-loop-optimization-related-hangs-on-eg.diff	2017-01-14 01:32:36.000000000 +0000
@@ -0,0 +1,387 @@
+commit e933246013eef376804662f3fcf4646c143c6c88
+Author: Heiko Przybyl <lil_tux@web.de>
+Date:   Sun Nov 20 14:42:28 2016 +0100
+
+    r600/sb: Fix loop optimization related hangs on eg
+    
+    Make sure unused ops and their references are removed, prior to entering
+    the GCM (global code motion) pass, to stop GCM from breaking the loop
+    logic and thus hanging the GPU.
+    
+    Turns out, that sb has problems with loops and node optimizations
+    regarding associative folding:
+    
+    - the global code motion (gcm) pass moves ops up a loop level/basic block
+    until they've fulfilled their total usage count
+    - if there are ops folded into others, the usage count won't be
+    fulfilled and thus the op moved way up to the top
+    - within GCM the op would be visited and their deps would be moved
+    alongside it, to fulfill the src constaints
+    - in a loop, an unused op is moved out of the loop and GCM would move
+    the src value ops up as well
+    - now here arises the problem: if the loop counter is one of the src
+    values it would get moved up as well, the loop break condition would
+    never get hit and the shader turn into an endless loop, resulting in the
+    GPU hanging and being reset
+    
+    A reduced (albeit nonsense) piglit example would be:
+    
+    [require]
+    GLSL >= 1.20
+    
+    [fragment shader]
+    
+    uniform int SIZE;
+    uniform vec4 lights[512];
+    
+    void main()
+    {
+        float x = 0;
+        for(int i = 0; i < SIZE; i++)
+            x += lights[2*i+1].x;
+    }
+    
+    [test]
+    uniform int SIZE 1
+    draw rect -1 -1 2 2
+    
+    Which gets optimized to:
+    
+    ===== SHADER #12 OPT ================================== PS/BARTS/EVERGREEN =====
+    ===== 42 dw ===== 1 gprs ===== 2 stack =========================================
+    ALU 3 @24
+         1      y: MOV                R0.y,  0
+                t: MULLO_UINT         R0.w,  [0x00000002 2.8026e-45].x, R0.z
+    
+    LOOP_START_DX10 @22
+    PUSH @6
+    ALU 1 @30 KC0[CB0:0-15]
+         2 M    x: PRED_SETGE_INT     __.x,  R0.z, KC0[0].x
+    JUMP @14 POP:1
+    LOOP_BREAK @20
+    POP @14 POP:1
+    ALU 2 @32
+         3      x: ADD_INT            R0.x,  R0.w, [0x00000002 2.8026e-45].x
+    
+    TEX 1 @36
+                   VFETCH             R0.x___, R0.x,   RID:0   MFC:16 UCF:0 FMT[..]
+    ALU 1 @40
+         4      y: ADD                R0.y,  R0.y, R0.x
+    LOOP_END @4
+    EXPORT_DONE        PIXEL 0     R0.____  EOP
+    ===== SHADER_END ===============================================================
+    
+    Notice R0.z being the loop counter/break condition relevant register
+    and being never incremented at all. Also some of the loop content
+    has been moved out of it, to fulfill the requirements for the one unused
+    op.
+    
+    With a debug build of mesa this would produce an error like
+    error at : PRED_SETGE_INT     __, __, EM.2,    R1.x.2||FP@R0.z, C0.x
+      : operand value R1.x.2||FP@R0.z was not previously written to its gpr
+    and the compilation would fail due to this. On a release build it gets
+    passed to the GPU.
+    
+    When using this patch, the loop remains intact:
+    
+    ===== SHADER #12 OPT ================================== PS/BARTS/EVERGREEN =====
+    ===== 48 dw ===== 1 gprs ===== 2 stack =========================================
+    ALU 2 @24
+         1      y: MOV                R0.y,  0
+                z: MOV                R0.z,  0
+    LOOP_START_DX10 @22
+    PUSH @6
+    ALU 1 @28 KC0[CB0:0-15]
+         2 M    x: PRED_SETGE_INT     __.x,  R0.z, KC0[0].x
+    JUMP @14 POP:1
+    LOOP_BREAK @20
+    POP @14 POP:1
+    ALU 4 @30
+         3      t: MULLO_UINT         T0.x,  [0x00000002 2.8026e-45].x, R0.z
+    
+         4      x: ADD_INT            R0.x,  T0.x, [0x00000002 2.8026e-45].x
+    
+    TEX 1 @40
+                   VFETCH             R0.x___, R0.x,   RID:0   MFC:16 UCF:0 FMT[..]
+    ALU 2 @44
+         5      y: ADD                R0.y,  R0.y, R0.x
+                z: ADD_INT            R0.z,  R0.z, 1
+    LOOP_END @4
+    EXPORT_DONE        PIXEL 0     R0.____  EOP
+    ===== SHADER_END ===============================================================
+    
+    Piglit: ./piglit summary console -d results/*_gpu_noglx
+            name: unpatched_gpu_noglx patched_gpu_noglx
+            ----  ------------------- -----------------
+            pass:               18016             18021
+            fail:                 748               743
+            crash:                  7                 7
+            skip:                1124              1124
+            timeout:                0                 0
+            warn:                  13                13
+            incomplete:             0                 0
+            dmesg-warn:             0                 0
+            dmesg-fail:             0                 0
+            changes:                0                 5
+            fixes:                  0                 5
+            regressions:            0                 0
+            total:              19908             19908
+    
+    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94900
+    Tested-by: Heiko Przybyl <lil_tux@web.de>
+    Tested-on: Barts PRO HD6850
+    Signed-off-by: Heiko Przybyl <lil_tux@web.de>
+    Signed-off-by: Marek Olšák <marek.olsak@amd.com>
+
+diff --git a/src/gallium/drivers/r600/sb/sb_dce_cleanup.cpp b/src/gallium/drivers/r600/sb/sb_dce_cleanup.cpp
+index 79aef91..abae2bf 100644
+--- a/src/gallium/drivers/r600/sb/sb_dce_cleanup.cpp
++++ b/src/gallium/drivers/r600/sb/sb_dce_cleanup.cpp
+@@ -30,6 +30,18 @@
+ 
+ namespace r600_sb {
+ 
++int dce_cleanup::run() {
++	int r;
++
++	// Run cleanup for as long as there are unused nodes.
++	do {
++		nodes_changed = false;
++		r = vpass::run();
++	} while (r == 0 && nodes_changed);
++
++	return r;
++}
++
+ bool dce_cleanup::visit(node& n, bool enter) {
+ 	if (enter) {
+ 	} else {
+@@ -110,7 +122,18 @@ bool dce_cleanup::visit(region_node& n, bool enter) {
+ void dce_cleanup::cleanup_dst(node& n) {
+ 	if (!cleanup_dst_vec(n.dst) && remove_unused &&
+ 			!n.dst.empty() && !(n.flags & NF_DONT_KILL) && n.parent)
++	{
++		// Delete use references to the removed node from the src values.
++		for (vvec::iterator I = n.src.begin(), E = n.src.end(); I != E; ++I) {
++			value* v = *I;
++			if (v && v->def && v->uses.size())
++			{
++				v->remove_use(&n);
++			}
++		}
+ 		n.remove();
++		nodes_changed = true;
++	}
+ }
+ 
+ bool dce_cleanup::visit(container_node& n, bool enter) {
+@@ -130,7 +153,7 @@ bool dce_cleanup::cleanup_dst_vec(vvec& vv) {
+ 		if (v->gvn_source && v->gvn_source->is_dead())
+ 			v->gvn_source = NULL;
+ 
+-		if (v->is_dead() || (remove_unused && !v->is_rel() && !v->uses))
++		if (v->is_dead() || (remove_unused && !v->is_rel() && !v->uses.size()))
+ 			v = NULL;
+ 		else
+ 			alive = true;
+diff --git a/src/gallium/drivers/r600/sb/sb_gcm.cpp b/src/gallium/drivers/r600/sb/sb_gcm.cpp
+index 236b2ea..9c75389 100644
+--- a/src/gallium/drivers/r600/sb/sb_gcm.cpp
++++ b/src/gallium/drivers/r600/sb/sb_gcm.cpp
+@@ -199,10 +199,9 @@ void gcm::td_release_val(value *v) {
+ 		sblog << "\n";
+ 	);
+ 
+-	use_info *u = v->uses;
+-	while (u) {
++	for (uselist::iterator I = v->uses.begin(), E = v->uses.end(); I != E; ++I) {
++		use_info *u = *I;
+ 		if (u->op->parent != &pending) {
+-			u = u->next;
+ 			continue;
+ 		}
+ 
+@@ -212,6 +211,7 @@ void gcm::td_release_val(value *v) {
+ 			sblog << "\n";
+ 		);
+ 
++		assert(uses[u->op] > 0);
+ 		if (--uses[u->op] == 0) {
+ 			GCM_DUMP(
+ 				sblog << "td        released : ";
+@@ -222,7 +222,6 @@ void gcm::td_release_val(value *v) {
+ 			pending.remove_node(u->op);
+ 			ready.push_back(u->op);
+ 		}
+-		u = u->next;
+ 	}
+ 
+ }
+diff --git a/src/gallium/drivers/r600/sb/sb_ir.cpp b/src/gallium/drivers/r600/sb/sb_ir.cpp
+index 5226893..d989dce 100644
+--- a/src/gallium/drivers/r600/sb/sb_ir.cpp
++++ b/src/gallium/drivers/r600/sb/sb_ir.cpp
+@@ -255,7 +255,7 @@ void container_node::expand() {
+ void node::remove() {parent->remove_node(this);
+ }
+ 
+-value_hash node::hash_src() {
++value_hash node::hash_src() const {
+ 
+ 	value_hash h = 12345;
+ 
+@@ -269,7 +269,7 @@ value_hash node::hash_src() {
+ }
+ 
+ 
+-value_hash node::hash() {
++value_hash node::hash() const {
+ 
+ 	if (parent && parent->subtype == NST_LOOP_PHI_CONTAINER)
+ 		return 47451;
+diff --git a/src/gallium/drivers/r600/sb/sb_ir.h b/src/gallium/drivers/r600/sb/sb_ir.h
+index 4fc4da2..74c0549 100644
+--- a/src/gallium/drivers/r600/sb/sb_ir.h
++++ b/src/gallium/drivers/r600/sb/sb_ir.h
+@@ -446,15 +446,16 @@ enum use_kind {
+ };
+ 
+ struct use_info {
+-	use_info *next;
+ 	node *op;
+ 	use_kind kind;
+ 	int arg;
+ 
+-	use_info(node *n, use_kind kind, int arg, use_info* next)
+-		: next(next), op(n), kind(kind), arg(arg) {}
++	use_info(node *n, use_kind kind, int arg)
++		: op(n), kind(kind), arg(arg) {}
+ };
+ 
++typedef std::list< use_info * > uselist;
++
+ enum constraint_kind {
+ 	CK_SAME_REG,
+ 	CK_PACKED_BS,
+@@ -498,7 +499,7 @@ public:
+ 	value_hash ghash;
+ 
+ 	node *def, *adef;
+-	use_info *uses;
++	uselist uses;
+ 
+ 	ra_constraint *constraint;
+ 	ra_chunk *chunk;
+@@ -585,6 +586,7 @@ public:
+ 	}
+ 
+ 	void add_use(node *n, use_kind kind, int arg);
++	void remove_use(const node *n);
+ 
+ 	value_hash hash();
+ 	value_hash rel_hash();
+@@ -790,8 +792,8 @@ public:
+ 	void replace_with(node *n);
+ 	void remove();
+ 
+-	virtual value_hash hash();
+-	value_hash hash_src();
++	virtual value_hash hash() const;
++	value_hash hash_src() const;
+ 
+ 	virtual bool fold_dispatch(expr_handler *ex);
+ 
+diff --git a/src/gallium/drivers/r600/sb/sb_pass.h b/src/gallium/drivers/r600/sb/sb_pass.h
+index 0346df1..e878f8c 100644
+--- a/src/gallium/drivers/r600/sb/sb_pass.h
++++ b/src/gallium/drivers/r600/sb/sb_pass.h
+@@ -124,7 +124,9 @@ class dce_cleanup : public vpass {
+ public:
+ 
+ 	dce_cleanup(shader &s) : vpass(s),
+-		remove_unused(s.dce_flags & DF_REMOVE_UNUSED) {}
++		remove_unused(s.dce_flags & DF_REMOVE_UNUSED), nodes_changed(false) {}
++
++	virtual int run();
+ 
+ 	virtual bool visit(node &n, bool enter);
+ 	virtual bool visit(alu_group_node &n, bool enter);
+@@ -140,6 +142,8 @@ private:
+ 	void cleanup_dst(node &n);
+ 	bool cleanup_dst_vec(vvec &vv);
+ 
++	// Did we alter/remove nodes during a single pass?
++	bool nodes_changed;
+ };
+ 
+ 
+diff --git a/src/gallium/drivers/r600/sb/sb_valtable.cpp b/src/gallium/drivers/r600/sb/sb_valtable.cpp
+index eb242b1..a8b7b49 100644
+--- a/src/gallium/drivers/r600/sb/sb_valtable.cpp
++++ b/src/gallium/drivers/r600/sb/sb_valtable.cpp
+@@ -220,17 +220,33 @@ void value::add_use(node* n, use_kind kind, int arg) {
+ 	dump::dump_op(n);
+ 	sblog << "     kind " << kind << "    arg " << arg << "\n";
+ 	}
+-	uses = new use_info(n, kind, arg, uses);
++	uses.push_back(new use_info(n, kind, arg));
+ }
+ 
+-unsigned value::use_count() {
+-	use_info *u = uses;
+-	unsigned c = 0;
+-	while (u) {
+-		++c;
+-		u = u->next;
++struct use_node_comp {
++	explicit use_node_comp(const node *n) : n(n) {}
++	bool operator() (const use_info *u) {
++		return u->op->hash() == n->hash();
++	}
++
++	private:
++		const node *n;
++};
++
++void value::remove_use(const node *n) {
++	uselist::iterator it =
++		std::find_if(uses.begin(), uses.end(), use_node_comp(n));
++
++	if (it != uses.end())
++	{
++		// TODO assert((*it)->kind == kind) ?
++		// TODO assert((*it)->arg == arg) ?
++		uses.erase(it);
+ 	}
+-	return c;
++}
++
++unsigned value::use_count() {
++	return uses.size();
+ }
+ 
+ bool value::is_global() {
+@@ -274,13 +290,7 @@ bool value::is_prealloc() {
+ }
+ 
+ void value::delete_uses() {
+-	use_info *u, *c = uses;
+-	while (c) {
+-		u = c->next;
+-		delete c;
+-		c = u;
+-	}
+-	uses = NULL;
++	uses.erase(uses.begin(), uses.end());
+ }
+ 
+ void ra_constraint::update_values() {
+@@ -468,7 +478,7 @@ bool r600_sb::sb_value_set::add_vec(vvec& vv) {
+ bool r600_sb::sb_value_set::contains(value* v) {
+ 	unsigned b = v->uid - 1;
+ 	if (b < bs.size())
+-		return bs.get(v->uid - 1);
++		return bs.get(b);
+ 	else
+ 		return false;
+ }
diff -Nru mesa-13.0.2/debian/patches/series mesa-13.0.3/debian/patches/series
--- mesa-13.0.2/debian/patches/series	2017-01-14 01:32:35.000000000 +0000
+++ mesa-13.0.3/debian/patches/series	2017-01-14 01:32:36.000000000 +0000
@@ -1,9 +1,18 @@
 07_gallium-fix-build-failure-on-powerpcspe.diff
 workaround-binutils-mips-844357.diff
-
-
+vl-zscan-fix-Fix-trivial-sign-compare-warnings.diff
+r600-sb-Fix-loop-optimization-related-hangs-on-eg.diff
 
 
 # Ubuntu patches.
 egl-platform-mir.patch
 i915-dont-default-to-2.1.patch
+
+0001-intel-blorp_blit-Create-structure-for-src-dst-coordi.patch
+0002-intel-blorp_blit-Split-blorp-blits-if-they-are-too-l.patch
+0003-intel-blorp_blit-Adjust-blorp-surface-parameters-for.patch
+0004-intel-blorp-Always-use-UINT-formats-on-SKL.patch
+0005-intel-blorp_blit-Move-RGB-R-conversion-to-follow-bli.patch
+0006-intel-blorp_blit-Enable-splitting-large-blorp-blits.patch
+0007-intel-blorp_blit-Add-split_blorp_blit_debug-switch.patch
+0008-i965-Increase-max-texture-to-16k-for-gen7.patch
diff -Nru mesa-13.0.2/debian/patches/vl-zscan-fix-Fix-trivial-sign-compare-warnings.diff mesa-13.0.3/debian/patches/vl-zscan-fix-Fix-trivial-sign-compare-warnings.diff
--- mesa-13.0.2/debian/patches/vl-zscan-fix-Fix-trivial-sign-compare-warnings.diff	1970-01-01 00:00:00.000000000 +0000
+++ mesa-13.0.3/debian/patches/vl-zscan-fix-Fix-trivial-sign-compare-warnings.diff	2017-01-14 01:32:36.000000000 +0000
@@ -0,0 +1,28 @@
+commit ac57bcda1e0e6dcfa81e24468d5b682686120649
+Author: Christian König <christian.koenig@amd.com>
+Date:   Wed Dec 14 15:03:35 2016 +0100
+
+    vl/zscan: fix "Fix trivial sign compare warnings"
+    
+    The variable actually needs to be signed, otherwise converting it to a
+    float doesn't work as expected.
+    
+    Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=98914
+    Signed-off-by: Christian König <christian.koenig@amd.com>
+    Reviewed-by: Nayan Deshmukh <nayan26deshmukh@gmail.com>
+    Cc: "13.0" <mesa-stable@lists.freedesktop.org>
+    Fixes: 1fb4179f927 ("vl: Fix trivial sign compare warnings")
+
+diff --git a/src/gallium/auxiliary/vl/vl_zscan.c b/src/gallium/auxiliary/vl/vl_zscan.c
+index ef05af4..24d6452 100644
+--- a/src/gallium/auxiliary/vl/vl_zscan.c
++++ b/src/gallium/auxiliary/vl/vl_zscan.c
+@@ -152,7 +152,7 @@ create_vert_shader(struct vl_zscan *zscan)
+    for (i = 0; i < zscan->num_channels; ++i) {
+       ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y),
+                ureg_imm1f(shader, 1.0f / (zscan->blocks_per_line * VL_BLOCK_WIDTH)
+-                * (i - (signed)zscan->num_channels / 2)));
++                * ((signed)i - (signed)zscan->num_channels / 2)));
+ 
+       ureg_MAD(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_X), vrect,
+                ureg_imm1f(shader, 1.0f / zscan->blocks_per_line), ureg_src(tmp));
diff -Nru mesa-13.0.2/debian/rules mesa-13.0.3/debian/rules
--- mesa-13.0.2/debian/rules	2017-01-14 01:32:35.000000000 +0000
+++ mesa-13.0.3/debian/rules	2017-01-14 01:32:36.000000000 +0000
@@ -8,18 +8,11 @@
 DEB_HOST_MULTIARCH ?= $(shell dpkg-architecture -qDEB_HOST_MULTIARCH)
 DEB_HOST_ARCH      ?= $(shell dpkg-architecture -qDEB_HOST_ARCH)
 DEB_HOST_ARCH_OS   ?= $(shell dpkg-architecture -qDEB_HOST_ARCH_OS)
-DEB_HOST_GNU_TYPE  ?= $(shell dpkg-architecture -qDEB_HOST_GNU_TYPE)
-DEB_BUILD_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_BUILD_GNU_TYPE)
 DEB_HOST_ARCH_CPU  ?= $(shell dpkg-architecture -qDEB_HOST_ARCH_CPU)
-ifeq ($(DEB_BUILD_GNU_TYPE), $(DEB_HOST_GNU_TYPE))
-	confflags += --build=$(DEB_HOST_GNU_TYPE)
-else
-	confflags += --build=$(DEB_BUILD_GNU_TYPE) --host=$(DEB_HOST_GNU_TYPE)
-endif
 
 ifeq (,$(filter $(DEB_HOST_ARCH), armhf))
 buildflags = \
-	$(shell DEB_CFLAGS_MAINT_APPEND="-Wall -O2" DEB_CXXFLAGS_MAINT_APPEND="-Wall -O2" dpkg-buildflags --export=configure)
+	$(shell DEB_CFLAGS_MAINT_APPEND=-Wall DEB_CXXFLAGS_MAINT_APPEND=-Wall dpkg-buildflags --export=configure)
 else
 # Workaround for a variant of LP: #725126
 buildflags = \
@@ -114,6 +107,12 @@
 	confflags_GALLIUM += --enable-lmsensors
 endif
 
+# Disable assembly usage on x32 otherwise Mesa defaults to x86_64 assembly
+# which doesn't work on x32 (see #758094)
+ifneq (,$(filter $(DEB_HOST_ARCH), x32))
+	confflags += --disable-asm
+endif
+
 confflags_EGL = --with-egl-platforms="$(EGL_DISPLAYS)"
 confflags_GLES = --enable-gles1 --enable-gles2
 confflags_GALLIUM += --with-gallium-drivers="$(GALLIUM_DRIVERS)"
@@ -153,7 +152,6 @@
 
 override_dh_auto_configure:
 	dh_auto_configure -- \
-		--disable-silent-rules \
 		$(confflags)
 
 override_dh_auto_install:
diff -Nru mesa-13.0.2/debian/source/format mesa-13.0.3/debian/source/format
--- mesa-13.0.2/debian/source/format	1970-01-01 00:00:00.000000000 +0000
+++ mesa-13.0.3/debian/source/format	2017-01-14 01:32:36.000000000 +0000
@@ -0,0 +1 @@
+1.0
diff -Nru mesa-13.0.2/docs/relnotes/13.0.2.html mesa-13.0.3/docs/relnotes/13.0.2.html
--- mesa-13.0.2/docs/relnotes/13.0.2.html	2016-11-28 15:05:25.000000000 +0000
+++ mesa-13.0.3/docs/relnotes/13.0.2.html	2016-12-12 23:51:29.000000000 +0000
@@ -31,7 +31,8 @@
 
 <h2>SHA256 checksums</h2>
 <pre>
-TBD
+6014233a5db6032ab8de4881384871bbe029de684502707794ce7b3e6beec308  mesa-13.0.2.tar.gz
+a6ed622645f4ed61da418bf65adde5bcc4bb79023c36ba7d6b45b389da4416d5  mesa-13.0.2.tar.xz
 </pre>
 
 
diff -Nru mesa-13.0.2/docs/relnotes/13.0.3.html mesa-13.0.3/docs/relnotes/13.0.3.html
--- mesa-13.0.2/docs/relnotes/13.0.3.html	1970-01-01 00:00:00.000000000 +0000
+++ mesa-13.0.3/docs/relnotes/13.0.3.html	2017-01-05 15:50:24.000000000 +0000
@@ -0,0 +1,176 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 13.0.3 Release Notes / January 5, 2017</h1>
+
+<p>
+Mesa 13.0.3 is a bug fix release which fixes bugs found since the 13.0.2 release.
+</p>
+<p>
+Mesa 13.0.3 implements the OpenGL 4.4 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.4.  OpenGL
+4.4 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+TBD
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=77662">Bug 77662</a> - Fail to render to different faces of depth-stencil cube map</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92234">Bug 92234</a> - [BDW] GPU hang in Shogun2</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98329">Bug 98329</a> - [dEQP, EGL, SKL, BDW, BSW] dEQP-EGL.functional.image.render_multiple_contexts.gles2_renderbuffer_depth16_depth_buffer</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99038">Bug 99038</a> - [dEQP, EGL, SKL, BDW, BSW] dEQP-EGL.functional.negative_api.create_pixmap_surface crashes</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Chad Versace (2):</p>
+<ul>
+  <li>i965/mt: Disable aux surfaces after making miptree shareable</li>
+  <li>egl: Fix crashes in eglCreate*Surface()</li>
+</ul>
+
+<p>Dave Airlie (4):</p>
+<ul>
+  <li>anv: set maxFragmentDualSrcAttachments to 1</li>
+  <li>radv: set maxFragmentDualSrcAttachments to 1</li>
+  <li>radv: fix another regression since shadow fixes.</li>
+  <li>radv: add missing license file to radv_meta_bufimage.</li>
+</ul>
+
+<p>Emil Velikov (5):</p>
+<ul>
+  <li>docs: add sha256 checksums for 13.0.2</li>
+  <li>anv: don't double-close the same fd</li>
+  <li>anv: don't leak memory if anv_init_wsi() fails</li>
+  <li>radv: don't leak the fd if radv_physical_device_init() succeeds</li>
+  <li>Update version to 13.0.3</li>
+</ul>
+
+<p>Eric Anholt (1):</p>
+<ul>
+  <li>vc4: In a loop break/continue, jump if everyone has taken the path.</li>
+</ul>
+
+<p>Gwan-gyeong Mun (3):</p>
+<ul>
+  <li>anv: Add missing error-checking to anv_block_pool_init (v2)</li>
+  <li>anv: Update the teardown in reverse order of the anv_CreateDevice</li>
+  <li>vulkan/wsi: Fix resource leak in success path of wsi_queue_init()</li>
+</ul>
+
+<p>Haixia Shi (1):</p>
+<ul>
+  <li>compiler/glsl: fix precision problem of tanh</li>
+</ul>
+
+<p>Ilia Mirkin (1):</p>
+<ul>
+  <li>mesa: only verify that enabled arrays have backing buffers</li>
+</ul>
+
+<p>Jason Ekstrand (8):</p>
+<ul>
+  <li>anv/cmd_buffer: Re-emit MEDIA_CURBE_LOAD when CS push constants are dirty</li>
+  <li>anv/image: Rename hiz_surface to aux_surface</li>
+  <li>anv/cmd_buffer: Remove the 1-D case from the HiZ QPitch calculation</li>
+  <li>genxml/gen9: Change the default of MI_SEMAPHORE_WAIT::RegisterPoleMode</li>
+  <li>anv/device: Return the right error for failed maps</li>
+  <li>anv/device: Implicitly unmap memory objects in FreeMemory</li>
+  <li>anv/descriptor_set: Write the state offset in the surface state free list.</li>
+  <li>spirv: Use a simpler and more correct implementaiton of tanh()</li>
+</ul>
+
+<p>Kenneth Graunke (1):</p>
+<ul>
+  <li>i965: Allocate at least some URB space even when max_vertices = 0.</li>
+</ul>
+
+<p>Marek Olšák (17):</p>
+<ul>
+  <li>radeonsi: always set all blend registers</li>
+  <li>radeonsi: set CB_BLEND1_CONTROL.ENABLE for dual source blending</li>
+  <li>radeonsi: disable RB+ blend optimizations for dual source blending</li>
+  <li>radeonsi: consolidate max-work-group-size computation</li>
+  <li>radeonsi: apply a multi-wave workgroup SPI bug workaround to affected CIK chips</li>
+  <li>radeonsi: apply a TC L1 write corruption workaround for SI</li>
+  <li>radeonsi: apply a tessellation bug workaround for SI</li>
+  <li>radeonsi: add a tess+GS hang workaround for VI dGPUs</li>
+  <li>radeonsi: apply the double EVENT_WRITE_EOP workaround to VI as well</li>
+  <li>cso: don't release sampler states that are bound</li>
+  <li>radeonsi: always restore sampler states when unbinding sampler views</li>
+  <li>radeonsi: fix incorrect FMASK checking in bind_sampler_states</li>
+  <li>radeonsi: allow specifying simm16 of emit_waitcnt at call sites</li>
+  <li>radeonsi: wait for outstanding memory instructions in TCS barriers</li>
+  <li>tgsi: fix the src type of TGSI_OPCODE_MEMBAR</li>
+  <li>radeonsi: wait for outstanding LDS instructions in memory barriers if needed</li>
+  <li>radeonsi: disable the constant engine (CE) on Carrizo and Stoney</li>
+</ul>
+
+<p>Matt Turner (3):</p>
+<ul>
+  <li>i965/fs: Rename opt_copy_propagate -&gt; opt_copy_propagation.</li>
+  <li>i965/fs: Add unit tests for copy propagation pass.</li>
+  <li>i965/fs: Reject copy propagation into SEL if not min/max.</li>
+</ul>
+
+<p>Nanley Chery (1):</p>
+<ul>
+  <li>mesa/fbobject: Update CubeMapFace when reusing textures</li>
+</ul>
+
+<p>Nicolai Hähnle (4):</p>
+<ul>
+  <li>radeonsi: fix isolines tess factor writes to control ring</li>
+  <li>radeonsi: update all GSVS ring descriptors for new buffer allocations</li>
+  <li>radeonsi: do not kill GS with memory writes</li>
+  <li>radeonsi: fix an off-by-one error in the bounds check for max_vertices</li>
+</ul>
+
+<p>Rhys Kidd (1):</p>
+<ul>
+  <li>glsl: Add pthread libs to cache_test</li>
+</ul>
+
+<p>Timothy Arceri (2):</p>
+<ul>
+  <li>mesa: fix active subroutine uniforms properly</li>
+  <li>Revert "nir: Turn imov/fmov of undef into undef."</li>
+</ul>
+
+
+</div>
+</body>
+</html>
diff -Nru mesa-13.0.2/src/amd/common/ac_nir_to_llvm.c mesa-13.0.3/src/amd/common/ac_nir_to_llvm.c
--- mesa-13.0.2/src/amd/common/ac_nir_to_llvm.c	2016-11-28 12:56:18.000000000 +0000
+++ mesa-13.0.3/src/amd/common/ac_nir_to_llvm.c	2016-12-14 19:03:11.000000000 +0000
@@ -3545,7 +3545,7 @@
 
 	if (instr->op == nir_texop_query_levels)
 		result = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, 3, false), "");
-	else if (instr->is_shadow && instr->op != nir_texop_txs && instr->op != nir_texop_lod)
+	else if (instr->is_shadow && instr->op != nir_texop_txs && instr->op != nir_texop_lod && instr->op != nir_texop_tg4)
 		result = LLVMBuildExtractElement(ctx->builder, result, ctx->i32zero, "");
 	else if (instr->op == nir_texop_txs &&
 		 instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
diff -Nru mesa-13.0.2/src/amd/vulkan/radv_device.c mesa-13.0.3/src/amd/vulkan/radv_device.c
--- mesa-13.0.2/src/amd/vulkan/radv_device.c	2016-11-28 12:56:17.000000000 +0000
+++ mesa-13.0.3/src/amd/vulkan/radv_device.c	2016-12-14 19:03:10.000000000 +0000
@@ -91,6 +91,7 @@
 
 	fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
 	device->name = device->rad_info.name;
+	close(fd);
 	return VK_SUCCESS;
 
 fail:
@@ -424,7 +425,7 @@
 		.maxGeometryTotalOutputComponents         = 1024,
 		.maxFragmentInputComponents               = 128,
 		.maxFragmentOutputAttachments             = 8,
-		.maxFragmentDualSrcAttachments            = 2,
+		.maxFragmentDualSrcAttachments            = 1,
 		.maxFragmentCombinedOutputResources       = 8,
 		.maxComputeSharedMemorySize               = 32768,
 		.maxComputeWorkGroupCount                 = { 65535, 65535, 65535 },
diff -Nru mesa-13.0.2/src/amd/vulkan/radv_meta_bufimage.c mesa-13.0.3/src/amd/vulkan/radv_meta_bufimage.c
--- mesa-13.0.2/src/amd/vulkan/radv_meta_bufimage.c	2016-11-10 22:06:40.000000000 +0000
+++ mesa-13.0.3/src/amd/vulkan/radv_meta_bufimage.c	2016-12-16 14:03:24.000000000 +0000
@@ -1,6 +1,33 @@
+/*
+ * Copyright © 2016 Red Hat.
+ * Copyright © 2016 Bas Nieuwenhuizen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
 #include "radv_meta.h"
 #include "nir/nir_builder.h"
 
+/*
+ * Compute shader implementation of image->buffer copy.
+ */
+
 static nir_shader *
 build_nir_itob_compute_shader(struct radv_device *dev)
 {
diff -Nru mesa-13.0.2/src/amd/vulkan/radv_timestamp.h mesa-13.0.3/src/amd/vulkan/radv_timestamp.h
--- mesa-13.0.2/src/amd/vulkan/radv_timestamp.h	2016-11-28 15:16:44.000000000 +0000
+++ mesa-13.0.3/src/amd/vulkan/radv_timestamp.h	2017-01-05 15:53:05.000000000 +0000
@@ -1 +1 @@
-#define RADV_TIMESTAMP "1480346204"
+#define RADV_TIMESTAMP "1483631585"
diff -Nru mesa-13.0.2/src/compiler/glsl/builtin_functions.cpp mesa-13.0.3/src/compiler/glsl/builtin_functions.cpp
--- mesa-13.0.2/src/compiler/glsl/builtin_functions.cpp	2016-11-10 22:05:17.000000000 +0000
+++ mesa-13.0.3/src/compiler/glsl/builtin_functions.cpp	2016-12-16 14:03:24.000000000 +0000
@@ -3563,9 +3563,17 @@
    ir_variable *x = in_var(type, "x");
    MAKE_SIG(type, v130, 1, x);
 
+   /* Clamp x to [-10, +10] to avoid precision problems.
+    * When x > 10, e^(-x) is so small relative to e^x that it gets flushed to
+    * zero in the computation e^x + e^(-x). The same happens in the other
+    * direction when x < -10.
+    */
+   ir_variable *t = body.make_temp(type, "tmp");
+   body.emit(assign(t, min2(max2(x, imm(-10.0f)), imm(10.0f))));
+
    /* (e^x - e^(-x)) / (e^x + e^(-x)) */
-   body.emit(ret(div(sub(exp(x), exp(neg(x))),
-                     add(exp(x), exp(neg(x))))));
+   body.emit(ret(div(sub(exp(t), exp(neg(t))),
+                     add(exp(t), exp(neg(t))))));
 
    return sig;
 }
diff -Nru mesa-13.0.2/src/compiler/glsl/linker.cpp mesa-13.0.3/src/compiler/glsl/linker.cpp
--- mesa-13.0.2/src/compiler/glsl/linker.cpp	2016-11-10 22:06:40.000000000 +0000
+++ mesa-13.0.3/src/compiler/glsl/linker.cpp	2016-12-14 19:03:10.000000000 +0000
@@ -3118,7 +3118,6 @@
          if (!uni)
             continue;
 
-         sh->NumSubroutineUniforms++;
          count = 0;
          if (sh->NumSubroutineFunctions == 0) {
             linker_error(prog, "subroutine uniform %s defined but no valid functions found\n", uni->type->name);
diff -Nru mesa-13.0.2/src/compiler/glsl/link_uniforms.cpp mesa-13.0.3/src/compiler/glsl/link_uniforms.cpp
--- mesa-13.0.2/src/compiler/glsl/link_uniforms.cpp	2016-11-10 22:06:40.000000000 +0000
+++ mesa-13.0.3/src/compiler/glsl/link_uniforms.cpp	2016-12-14 19:03:10.000000000 +0000
@@ -633,6 +633,8 @@
          uniform->opaque[shader_type].index = this->next_subroutine;
          uniform->opaque[shader_type].active = true;
 
+         prog->_LinkedShaders[shader_type]->NumSubroutineUniforms++;
+
          /* Increment the subroutine index by 1 for non-arrays and by the
           * number of array elements for arrays.
           */
diff -Nru mesa-13.0.2/src/compiler/glsl/tests/.deps/cache_test.Po mesa-13.0.3/src/compiler/glsl/tests/.deps/cache_test.Po
--- mesa-13.0.2/src/compiler/glsl/tests/.deps/cache_test.Po	2016-11-28 15:16:14.000000000 +0000
+++ mesa-13.0.3/src/compiler/glsl/tests/.deps/cache_test.Po	1970-01-01 00:00:00.000000000 +0000
@@ -1 +0,0 @@
-# dummy
diff -Nru mesa-13.0.2/src/compiler/glsl/tests/.deps/glsl_tests_cache_test-cache_test.Po mesa-13.0.3/src/compiler/glsl/tests/.deps/glsl_tests_cache_test-cache_test.Po
--- mesa-13.0.2/src/compiler/glsl/tests/.deps/glsl_tests_cache_test-cache_test.Po	1970-01-01 00:00:00.000000000 +0000
+++ mesa-13.0.3/src/compiler/glsl/tests/.deps/glsl_tests_cache_test-cache_test.Po	2017-01-05 15:52:34.000000000 +0000
@@ -0,0 +1 @@
+# dummy
diff -Nru mesa-13.0.2/src/compiler/Makefile.glsl.am mesa-13.0.3/src/compiler/Makefile.glsl.am
--- mesa-13.0.2/src/compiler/Makefile.glsl.am	2016-11-10 22:06:40.000000000 +0000
+++ mesa-13.0.3/src/compiler/Makefile.glsl.am	2016-12-24 13:03:53.000000000 +0000
@@ -62,8 +62,11 @@
 
 glsl_tests_cache_test_SOURCES =				\
 	glsl/tests/cache_test.c
+glsl_tests_cache_test_CFLAGS =				\
+	$(PTHREAD_CFLAGS)
 glsl_tests_cache_test_LDADD =				\
-	glsl/libglsl.la
+	glsl/libglsl.la					\
+	$(PTHREAD_LIBS)
 
 glsl_tests_general_ir_test_SOURCES =			\
 	glsl/tests/builtin_variable_test.cpp		\
diff -Nru mesa-13.0.2/src/compiler/Makefile.in mesa-13.0.3/src/compiler/Makefile.in
--- mesa-13.0.2/src/compiler/Makefile.in	2016-11-28 15:16:02.000000000 +0000
+++ mesa-13.0.3/src/compiler/Makefile.in	2017-01-05 15:52:22.000000000 +0000
@@ -401,9 +401,15 @@
 am_glsl_tests_blob_test_OBJECTS = glsl/tests/blob_test.$(OBJEXT)
 glsl_tests_blob_test_OBJECTS = $(am_glsl_tests_blob_test_OBJECTS)
 glsl_tests_blob_test_DEPENDENCIES = glsl/libglsl.la
-am_glsl_tests_cache_test_OBJECTS = glsl/tests/cache_test.$(OBJEXT)
+am_glsl_tests_cache_test_OBJECTS =  \
+	glsl/tests/glsl_tests_cache_test-cache_test.$(OBJEXT)
 glsl_tests_cache_test_OBJECTS = $(am_glsl_tests_cache_test_OBJECTS)
-glsl_tests_cache_test_DEPENDENCIES = glsl/libglsl.la
+glsl_tests_cache_test_DEPENDENCIES = glsl/libglsl.la \
+	$(am__DEPENDENCIES_1)
+glsl_tests_cache_test_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \
+	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \
+	$(glsl_tests_cache_test_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+	$(LDFLAGS) -o $@
 am_glsl_tests_general_ir_test_OBJECTS =  \
 	glsl/tests/builtin_variable_test.$(OBJEXT) \
 	glsl/tests/invalidate_locations_test.$(OBJEXT) \
@@ -1389,8 +1395,12 @@
 glsl_tests_cache_test_SOURCES = \
 	glsl/tests/cache_test.c
 
+glsl_tests_cache_test_CFLAGS = \
+	$(PTHREAD_CFLAGS)
+
 glsl_tests_cache_test_LDADD = \
-	glsl/libglsl.la
+	glsl/libglsl.la					\
+	$(PTHREAD_LIBS)
 
 glsl_tests_general_ir_test_SOURCES = \
 	glsl/tests/builtin_variable_test.cpp		\
@@ -2024,12 +2034,13 @@
 glsl/tests/blob-test$(EXEEXT): $(glsl_tests_blob_test_OBJECTS) $(glsl_tests_blob_test_DEPENDENCIES) $(EXTRA_glsl_tests_blob_test_DEPENDENCIES) glsl/tests/$(am__dirstamp)
 	@rm -f glsl/tests/blob-test$(EXEEXT)
 	$(AM_V_CCLD)$(LINK) $(glsl_tests_blob_test_OBJECTS) $(glsl_tests_blob_test_LDADD) $(LIBS)
-glsl/tests/cache_test.$(OBJEXT): glsl/tests/$(am__dirstamp) \
+glsl/tests/glsl_tests_cache_test-cache_test.$(OBJEXT):  \
+	glsl/tests/$(am__dirstamp) \
 	glsl/tests/$(DEPDIR)/$(am__dirstamp)
 
 glsl/tests/cache-test$(EXEEXT): $(glsl_tests_cache_test_OBJECTS) $(glsl_tests_cache_test_DEPENDENCIES) $(EXTRA_glsl_tests_cache_test_DEPENDENCIES) glsl/tests/$(am__dirstamp)
 	@rm -f glsl/tests/cache-test$(EXEEXT)
-	$(AM_V_CCLD)$(LINK) $(glsl_tests_cache_test_OBJECTS) $(glsl_tests_cache_test_LDADD) $(LIBS)
+	$(AM_V_CCLD)$(glsl_tests_cache_test_LINK) $(glsl_tests_cache_test_OBJECTS) $(glsl_tests_cache_test_LDADD) $(LIBS)
 glsl/tests/builtin_variable_test.$(OBJEXT):  \
 	glsl/tests/$(am__dirstamp) \
 	glsl/tests/$(DEPDIR)/$(am__dirstamp)
@@ -2220,9 +2231,9 @@
 @AMDEP_TRUE@@am__include@ @am__quote@glsl/glcpp/$(DEPDIR)/pp.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@glsl/tests/$(DEPDIR)/blob_test.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@glsl/tests/$(DEPDIR)/builtin_variable_test.Po@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@glsl/tests/$(DEPDIR)/cache_test.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@glsl/tests/$(DEPDIR)/copy_constant_to_storage_tests.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@glsl/tests/$(DEPDIR)/general_ir_test.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@glsl/tests/$(DEPDIR)/glsl_tests_cache_test-cache_test.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@glsl/tests/$(DEPDIR)/invalidate_locations_test.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@glsl/tests/$(DEPDIR)/sampler_types_test.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@glsl/tests/$(DEPDIR)/set_uniform_initializer_tests.Po@am__quote@
@@ -2331,6 +2342,20 @@
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
 
+glsl/tests/glsl_tests_cache_test-cache_test.o: glsl/tests/cache_test.c
+@am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(glsl_tests_cache_test_CFLAGS) $(CFLAGS) -MT glsl/tests/glsl_tests_cache_test-cache_test.o -MD -MP -MF glsl/tests/$(DEPDIR)/glsl_tests_cache_test-cache_test.Tpo -c -o glsl/tests/glsl_tests_cache_test-cache_test.o `test -f 'glsl/tests/cache_test.c' || echo '$(srcdir)/'`glsl/tests/cache_test.c
+@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) glsl/tests/$(DEPDIR)/glsl_tests_cache_test-cache_test.Tpo glsl/tests/$(DEPDIR)/glsl_tests_cache_test-cache_test.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='glsl/tests/cache_test.c' object='glsl/tests/glsl_tests_cache_test-cache_test.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(glsl_tests_cache_test_CFLAGS) $(CFLAGS) -c -o glsl/tests/glsl_tests_cache_test-cache_test.o `test -f 'glsl/tests/cache_test.c' || echo '$(srcdir)/'`glsl/tests/cache_test.c
+
+glsl/tests/glsl_tests_cache_test-cache_test.obj: glsl/tests/cache_test.c
+@am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(glsl_tests_cache_test_CFLAGS) $(CFLAGS) -MT glsl/tests/glsl_tests_cache_test-cache_test.obj -MD -MP -MF glsl/tests/$(DEPDIR)/glsl_tests_cache_test-cache_test.Tpo -c -o glsl/tests/glsl_tests_cache_test-cache_test.obj `if test -f 'glsl/tests/cache_test.c'; then $(CYGPATH_W) 'glsl/tests/cache_test.c'; else $(CYGPATH_W) '$(srcdir)/glsl/tests/cache_test.c'; fi`
+@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) glsl/tests/$(DEPDIR)/glsl_tests_cache_test-cache_test.Tpo glsl/tests/$(DEPDIR)/glsl_tests_cache_test-cache_test.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='glsl/tests/cache_test.c' object='glsl/tests/glsl_tests_cache_test-cache_test.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(glsl_tests_cache_test_CFLAGS) $(CFLAGS) -c -o glsl/tests/glsl_tests_cache_test-cache_test.obj `if test -f 'glsl/tests/cache_test.c'; then $(CYGPATH_W) 'glsl/tests/cache_test.c'; else $(CYGPATH_W) '$(srcdir)/glsl/tests/cache_test.c'; fi`
+
 spirv/spirv2nir-spirv2nir.o: spirv/spirv2nir.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(spirv2nir_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT spirv/spirv2nir-spirv2nir.o -MD -MP -MF spirv/$(DEPDIR)/spirv2nir-spirv2nir.Tpo -c -o spirv/spirv2nir-spirv2nir.o `test -f 'spirv/spirv2nir.c' || echo '$(srcdir)/'`spirv/spirv2nir.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) spirv/$(DEPDIR)/spirv2nir-spirv2nir.Tpo spirv/$(DEPDIR)/spirv2nir-spirv2nir.Po
diff -Nru mesa-13.0.2/src/compiler/nir/nir_opt_undef.c mesa-13.0.3/src/compiler/nir/nir_opt_undef.c
--- mesa-13.0.2/src/compiler/nir/nir_opt_undef.c	2016-11-10 22:05:17.000000000 +0000
+++ mesa-13.0.3/src/compiler/nir/nir_opt_undef.c	2016-12-16 14:03:24.000000000 +0000
@@ -79,9 +79,7 @@
 {
    if (alu->op != nir_op_vec2 &&
        alu->op != nir_op_vec3 &&
-       alu->op != nir_op_vec4 &&
-       alu->op != nir_op_fmov &&
-       alu->op != nir_op_imov)
+       alu->op != nir_op_vec4)
       return false;
 
    assert(alu->dest.dest.is_ssa);
diff -Nru mesa-13.0.2/src/compiler/spirv/vtn_glsl450.c mesa-13.0.3/src/compiler/spirv/vtn_glsl450.c
--- mesa-13.0.2/src/compiler/spirv/vtn_glsl450.c	2016-11-10 22:05:17.000000000 +0000
+++ mesa-13.0.3/src/compiler/spirv/vtn_glsl450.c	2016-12-16 14:03:24.000000000 +0000
@@ -565,16 +565,21 @@
                                    build_exp(nb, nir_fneg(nb, src[0]))));
       return;
 
-   case GLSLstd450Tanh:
-      /* (0.5 * (e^x - e^(-x))) / (0.5 * (e^x + e^(-x))) */
-      val->ssa->def =
-         nir_fdiv(nb, nir_fmul(nb, nir_imm_float(nb, 0.5f),
-                                   nir_fsub(nb, build_exp(nb, src[0]),
-                                                build_exp(nb, nir_fneg(nb, src[0])))),
-                      nir_fmul(nb, nir_imm_float(nb, 0.5f),
-                                   nir_fadd(nb, build_exp(nb, src[0]),
-                                                build_exp(nb, nir_fneg(nb, src[0])))));
+   case GLSLstd450Tanh: {
+      /* tanh(x) := (0.5 * (e^x - e^(-x))) / (0.5 * (e^x + e^(-x)))
+       *
+       * With a little algebra this reduces to (e^2x - 1) / (e^2x + 1)
+       *
+       * We clamp x to (-inf, +10] to avoid precision problems.  When x > 10,
+       * e^2x is so much larger than 1.0 that 1.0 gets flushed to zero in the
+       * computation e^2x +/- 1 so it can be ignored.
+       */
+      nir_ssa_def *x = nir_fmin(nb, src[0], nir_imm_float(nb, 10));
+      nir_ssa_def *exp2x = build_exp(nb, nir_fmul(nb, x, nir_imm_float(nb, 2)));
+      val->ssa->def = nir_fdiv(nb, nir_fsub(nb, exp2x, nir_imm_float(nb, 1)),
+                                   nir_fadd(nb, exp2x, nir_imm_float(nb, 1)));
       return;
+   }
 
    case GLSLstd450Asinh:
       val->ssa->def = nir_fmul(nb, nir_fsign(nb, src[0]),
diff -Nru mesa-13.0.2/src/egl/main/eglapi.c mesa-13.0.3/src/egl/main/eglapi.c
--- mesa-13.0.2/src/egl/main/eglapi.c	2016-11-10 22:06:40.000000000 +0000
+++ mesa-13.0.3/src/egl/main/eglapi.c	2016-12-16 14:03:24.000000000 +0000
@@ -849,7 +849,7 @@
       RETURN_EGL_ERROR(disp, EGL_BAD_NATIVE_WINDOW, EGL_NO_SURFACE);
 
 #ifdef HAVE_SURFACELESS_PLATFORM
-   if (disp->Platform == _EGL_PLATFORM_SURFACELESS) {
+   if (disp && disp->Platform == _EGL_PLATFORM_SURFACELESS) {
       /* From the EGL_MESA_platform_surfaceless spec (v1):
        *
        *    eglCreatePlatformWindowSurface fails when called with a <display>
@@ -970,7 +970,7 @@
    EGLSurface ret;
 
 #if HAVE_SURFACELESS_PLATFORM
-   if (disp->Platform == _EGL_PLATFORM_SURFACELESS) {
+   if (disp && disp->Platform == _EGL_PLATFORM_SURFACELESS) {
       /* From the EGL_MESA_platform_surfaceless spec (v1):
        *
        *   [Like eglCreatePlatformWindowSurface,] eglCreatePlatformPixmapSurface
diff -Nru mesa-13.0.2/src/gallium/auxiliary/cso_cache/cso_cache.c mesa-13.0.3/src/gallium/auxiliary/cso_cache/cso_cache.c
--- mesa-13.0.2/src/gallium/auxiliary/cso_cache/cso_cache.c	2016-11-10 22:05:17.000000000 +0000
+++ mesa-13.0.3/src/gallium/auxiliary/cso_cache/cso_cache.c	2016-12-14 19:03:12.000000000 +0000
@@ -188,7 +188,9 @@
                  void *state)
 {
    struct cso_hash *hash = _cso_hash_for_type(sc, type);
-   sanitize_hash(sc, hash, type, sc->max_size);
+
+   if (type != CSO_SAMPLER)
+      sanitize_hash(sc, hash, type, sc->max_size);
 
    return cso_hash_insert(hash, hash_key, state);
 }
diff -Nru mesa-13.0.2/src/gallium/auxiliary/tgsi/tgsi_info.c mesa-13.0.3/src/gallium/auxiliary/tgsi/tgsi_info.c
--- mesa-13.0.2/src/gallium/auxiliary/tgsi/tgsi_info.c	2016-11-10 22:05:17.000000000 +0000
+++ mesa-13.0.3/src/gallium/auxiliary/tgsi/tgsi_info.c	2016-12-14 19:03:12.000000000 +0000
@@ -485,6 +485,7 @@
    case TGSI_OPCODE_UMUL_HI:
    case TGSI_OPCODE_UP2H:
    case TGSI_OPCODE_U2I64:
+   case TGSI_OPCODE_MEMBAR:
       return TGSI_TYPE_UNSIGNED;
    case TGSI_OPCODE_IMUL_HI:
    case TGSI_OPCODE_I2F:
diff -Nru mesa-13.0.2/src/gallium/drivers/radeon/r600_pipe_common.c mesa-13.0.3/src/gallium/drivers/radeon/r600_pipe_common.c
--- mesa-13.0.2/src/gallium/drivers/radeon/r600_pipe_common.c	2016-11-10 22:06:40.000000000 +0000
+++ mesa-13.0.3/src/gallium/drivers/radeon/r600_pipe_common.c	2016-12-14 19:03:11.000000000 +0000
@@ -85,7 +85,8 @@
 {
 	struct radeon_winsys_cs *cs = ctx->gfx.cs;
 
-	if (ctx->chip_class == CIK) {
+	if (ctx->chip_class == CIK ||
+	    ctx->chip_class == VI) {
 		/* Two EOP events are required to make all engines go idle
 		 * (and optional cache flushes executed) before the timestamp
 		 * is written.
@@ -114,7 +115,8 @@
 {
 	unsigned dwords = 6;
 
-	if (screen->chip_class == CIK)
+	if (screen->chip_class == CIK ||
+	    screen->chip_class == VI)
 		dwords *= 2;
 
 	if (!screen->info.has_virtual_memory)
diff -Nru mesa-13.0.2/src/gallium/drivers/radeonsi/si_compute.c mesa-13.0.3/src/gallium/drivers/radeonsi/si_compute.c
--- mesa-13.0.2/src/gallium/drivers/radeonsi/si_compute.c	2016-11-24 16:34:41.000000000 +0000
+++ mesa-13.0.3/src/gallium/drivers/radeonsi/si_compute.c	2016-12-14 19:03:11.000000000 +0000
@@ -343,6 +343,7 @@
 			lds_blocks += align(program->local_size, 512) >> 9;
 		}
 
+		/* TODO: use si_multiwave_lds_size_workaround */
 		assert(lds_blocks <= 0xFF);
 
 		config->rsrc2 &= C_00B84C_LDS_SIZE;
diff -Nru mesa-13.0.2/src/gallium/drivers/radeonsi/si_descriptors.c mesa-13.0.3/src/gallium/drivers/radeonsi/si_descriptors.c
--- mesa-13.0.2/src/gallium/drivers/radeonsi/si_descriptors.c	2016-11-10 22:06:40.000000000 +0000
+++ mesa-13.0.3/src/gallium/drivers/radeonsi/si_descriptors.c	2016-12-14 19:03:12.000000000 +0000
@@ -413,13 +413,13 @@
 	struct si_sampler_views *views = &sctx->samplers[shader].views;
 	struct si_sampler_view *rview = (struct si_sampler_view*)view;
 	struct si_descriptors *descs = si_sampler_descriptors(sctx, shader);
+	uint32_t *desc = descs->list + slot * 16;
 
 	if (views->views[slot] == view && !disallow_early_out)
 		return;
 
 	if (view) {
 		struct r600_texture *rtex = (struct r600_texture *)view->texture;
-		uint32_t *desc = descs->list + slot * 16;
 
 		assert(rtex); /* views with texture == NULL aren't supported */
 		pipe_sampler_view_reference(&views->views[slot], view);
@@ -468,9 +468,14 @@
 					   rview->is_stencil_sampler, true);
 	} else {
 		pipe_sampler_view_reference(&views->views[slot], NULL);
-		memcpy(descs->list + slot*16, null_texture_descriptor, 8*4);
+		memcpy(desc, null_texture_descriptor, 8*4);
 		/* Only clear the lower dwords of FMASK. */
-		memcpy(descs->list + slot*16 + 8, null_texture_descriptor, 4*4);
+		memcpy(desc + 8, null_texture_descriptor, 4*4);
+		/* Re-set the sampler state if we are transitioning from FMASK. */
+		if (views->sampler_states[slot])
+			memcpy(desc + 12,
+			       views->sampler_states[slot], 4*4);
+
 		views->enabled_mask &= ~(1u << slot);
 	}
 
@@ -803,10 +808,10 @@
 		/* If FMASK is bound, don't overwrite it.
 		 * The sampler state will be set after FMASK is unbound.
 		 */
-		if (samplers->views.views[i] &&
-		    samplers->views.views[i]->texture &&
-		    samplers->views.views[i]->texture->target != PIPE_BUFFER &&
-		    ((struct r600_texture*)samplers->views.views[i]->texture)->fmask.size)
+		if (samplers->views.views[slot] &&
+		    samplers->views.views[slot]->texture &&
+		    samplers->views.views[slot]->texture->target != PIPE_BUFFER &&
+		    ((struct r600_texture*)samplers->views.views[slot]->texture)->fmask.size)
 			continue;
 
 		memcpy(desc->list + slot * 16 + 12, sstates[i]->val, 4*4);
diff -Nru mesa-13.0.2/src/gallium/drivers/radeonsi/si_pipe.c mesa-13.0.3/src/gallium/drivers/radeonsi/si_pipe.c
--- mesa-13.0.2/src/gallium/drivers/radeonsi/si_pipe.c	2016-11-10 22:06:40.000000000 +0000
+++ mesa-13.0.3/src/gallium/drivers/radeonsi/si_pipe.c	2016-12-14 19:03:12.000000000 +0000
@@ -187,7 +187,10 @@
 
 	/* SI + AMDGPU + CE = GPU hang */
 	if (!(sscreen->b.debug_flags & DBG_NO_CE) && ws->cs_add_const_ib &&
-	    sscreen->b.chip_class != SI) {
+	    sscreen->b.chip_class != SI &&
+	    /* These can't use CE due to a power gating bug in the kernel. */
+	    sscreen->b.family != CHIP_CARRIZO &&
+	    sscreen->b.family != CHIP_STONEY) {
 		sctx->ce_ib = ws->cs_add_const_ib(sctx->b.gfx.cs);
 		if (!sctx->ce_ib)
 			goto fail;
diff -Nru mesa-13.0.2/src/gallium/drivers/radeonsi/si_shader.c mesa-13.0.3/src/gallium/drivers/radeonsi/si_shader.c
--- mesa-13.0.2/src/gallium/drivers/radeonsi/si_shader.c	2016-11-10 22:06:40.000000000 +0000
+++ mesa-13.0.3/src/gallium/drivers/radeonsi/si_shader.c	2016-12-16 14:03:24.000000000 +0000
@@ -2577,10 +2577,18 @@
 				 lp_build_const_int32(gallivm,
 						      tess_outer_index * 4), "");
 
-	for (i = 0; i < outer_comps; i++)
-		out[i] = lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_outer);
-	for (i = 0; i < inner_comps; i++)
-		out[outer_comps+i] = lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_inner);
+	if (shader->key.tcs.epilog.prim_mode == PIPE_PRIM_LINES) {
+		/* For isolines, the hardware expects tess factors in the
+		 * reverse order from what GLSL / TGSI specify.
+		 */
+		out[0] = lds_load(bld_base, TGSI_TYPE_SIGNED, 1, lds_outer);
+		out[1] = lds_load(bld_base, TGSI_TYPE_SIGNED, 0, lds_outer);
+	} else {
+		for (i = 0; i < outer_comps; i++)
+			out[i] = lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_outer);
+		for (i = 0; i < inner_comps; i++)
+			out[outer_comps+i] = lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_inner);
+	}
 
 	/* Convert the outputs to vectors for stores. */
 	vec0 = lp_build_gather_values(gallivm, out, MIN2(stride, 4));
@@ -3301,6 +3309,7 @@
  * point in the program by emitting empty inline assembly that is marked as
  * having side effects.
  */
+#if 0 /* unused currently */
 static void emit_optimization_barrier(struct si_shader_context *ctx)
 {
 	LLVMBuilderRef builder = ctx->gallivm.builder;
@@ -3308,13 +3317,19 @@
 	LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, "", "", true, false);
 	LLVMBuildCall(builder, inlineasm, NULL, 0, "");
 }
+#endif
 
-static void emit_waitcnt(struct si_shader_context *ctx)
+/* Combine these with & instead of |. */
+#define NOOP_WAITCNT 0xf7f
+#define LGKM_CNT 0x07f
+#define VM_CNT 0xf70
+
+static void emit_waitcnt(struct si_shader_context *ctx, unsigned simm16)
 {
 	struct gallivm_state *gallivm = &ctx->gallivm;
 	LLVMBuilderRef builder = gallivm->builder;
 	LLVMValueRef args[1] = {
-		lp_build_const_int32(gallivm, 0xf70)
+		lp_build_const_int32(gallivm, simm16)
 	};
 	lp_build_intrinsic(builder, "llvm.amdgcn.s.waitcnt",
 			   ctx->voidt, args, 1, 0);
@@ -3326,8 +3341,23 @@
 		struct lp_build_emit_data *emit_data)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
+	LLVMValueRef src0 = lp_build_emit_fetch(bld_base, emit_data->inst, 0, 0);
+	unsigned flags = LLVMConstIntGetZExtValue(src0);
+	unsigned waitcnt = NOOP_WAITCNT;
+
+	if (flags & TGSI_MEMBAR_THREAD_GROUP)
+		waitcnt &= VM_CNT & LGKM_CNT;
+
+	if (flags & (TGSI_MEMBAR_ATOMIC_BUFFER |
+		     TGSI_MEMBAR_SHADER_BUFFER |
+		     TGSI_MEMBAR_SHADER_IMAGE))
+		waitcnt &= VM_CNT;
+
+	if (flags & TGSI_MEMBAR_SHARED)
+		waitcnt &= LGKM_CNT;
 
-	emit_waitcnt(ctx);
+	if (waitcnt != NOOP_WAITCNT)
+		emit_waitcnt(ctx, waitcnt);
 }
 
 static LLVMValueRef
@@ -3481,7 +3511,8 @@
 		struct si_shader_context *ctx,
 		struct lp_build_emit_data * emit_data,
 		unsigned target,
-		bool atomic)
+		bool atomic,
+		bool force_glc)
 {
 	const struct tgsi_full_instruction *inst = emit_data->inst;
 	LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0);
@@ -3489,6 +3520,7 @@
 	LLVMValueRef r128 = i1false;
 	LLVMValueRef da = tgsi_is_array_image(target) ? i1true : i1false;
 	LLVMValueRef glc =
+		force_glc ||
 		inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE) ?
 		i1true : i1false;
 	LLVMValueRef slc = i1false;
@@ -3543,7 +3575,8 @@
 		LLVMValueRef rsrc,
 		LLVMValueRef index,
 		LLVMValueRef offset,
-		bool atomic)
+		bool atomic,
+		bool force_glc)
 {
 	const struct tgsi_full_instruction *inst = emit_data->inst;
 	LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0);
@@ -3554,6 +3587,7 @@
 	emit_data->args[emit_data->arg_count++] = offset; /* voffset */
 	if (!atomic) {
 		emit_data->args[emit_data->arg_count++] =
+			force_glc ||
 			inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE) ?
 			i1true : i1false; /* glc */
 	}
@@ -3583,7 +3617,7 @@
 		offset = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, "");
 
 		buffer_append_args(ctx, emit_data, rsrc, bld_base->uint_bld.zero,
-				   offset, false);
+				   offset, false, false);
 	} else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) {
 		LLVMValueRef coords;
 
@@ -3593,14 +3627,14 @@
 		if (target == TGSI_TEXTURE_BUFFER) {
 			rsrc = extract_rsrc_top_half(ctx, rsrc);
 			buffer_append_args(ctx, emit_data, rsrc, coords,
-					bld_base->uint_bld.zero, false);
+					   bld_base->uint_bld.zero, false, false);
 		} else {
 			emit_data->args[0] = coords;
 			emit_data->args[1] = rsrc;
 			emit_data->args[2] = lp_build_const_int32(gallivm, 15); /* dmask */
 			emit_data->arg_count = 3;
 
-			image_append_args(ctx, emit_data, target, false);
+			image_append_args(ctx, emit_data, target, false, false);
 		}
 	}
 }
@@ -3727,7 +3761,7 @@
 	}
 
 	if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
-		emit_waitcnt(ctx);
+		emit_waitcnt(ctx, VM_CNT);
 
 	if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
 		load_emit_buffer(ctx, emit_data);
@@ -3790,11 +3824,19 @@
 		offset = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, "");
 
 		buffer_append_args(ctx, emit_data, rsrc, bld_base->uint_bld.zero,
-				   offset, false);
+				   offset, false, false);
 	} else if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE) {
 		unsigned target = inst->Memory.Texture;
 		LLVMValueRef coords;
 
+		/* 8bit/16bit TC L1 write corruption bug on SI.
+		 * All store opcodes not aligned to a dword are affected.
+		 *
+		 * The only way to get unaligned stores in radeonsi is through
+		 * shader images.
+		 */
+		bool force_glc = ctx->screen->b.chip_class == SI;
+
 		coords = image_fetch_coords(bld_base, inst, 0);
 
 		if (target == TGSI_TEXTURE_BUFFER) {
@@ -3802,14 +3844,14 @@
 
 			rsrc = extract_rsrc_top_half(ctx, rsrc);
 			buffer_append_args(ctx, emit_data, rsrc, coords,
-					bld_base->uint_bld.zero, false);
+					   bld_base->uint_bld.zero, false, force_glc);
 		} else {
 			emit_data->args[1] = coords;
 			image_fetch_rsrc(bld_base, &memory, true, &emit_data->args[2]);
 			emit_data->args[3] = lp_build_const_int32(gallivm, 15); /* dmask */
 			emit_data->arg_count = 4;
 
-			image_append_args(ctx, emit_data, target, false);
+			image_append_args(ctx, emit_data, target, false, force_glc);
 		}
 	}
 }
@@ -3929,7 +3971,7 @@
 	}
 
 	if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
-		emit_waitcnt(ctx);
+		emit_waitcnt(ctx, VM_CNT);
 
 	if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
 		store_emit_buffer(ctx, emit_data);
@@ -3993,7 +4035,7 @@
 		offset = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, "");
 
 		buffer_append_args(ctx, emit_data, rsrc, bld_base->uint_bld.zero,
-				   offset, true);
+				   offset, true, false);
 	} else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) {
 		unsigned target = inst->Memory.Texture;
 		LLVMValueRef coords;
@@ -4005,12 +4047,12 @@
 		if (target == TGSI_TEXTURE_BUFFER) {
 			rsrc = extract_rsrc_top_half(ctx, rsrc);
 			buffer_append_args(ctx, emit_data, rsrc, coords,
-					   bld_base->uint_bld.zero, true);
+					   bld_base->uint_bld.zero, true, false);
 		} else {
 			emit_data->args[emit_data->arg_count++] = coords;
 			emit_data->args[emit_data->arg_count++] = rsrc;
 
-			image_append_args(ctx, emit_data, target, true);
+			image_append_args(ctx, emit_data, target, true, false);
 		}
 	}
 }
@@ -5247,6 +5289,7 @@
 	struct si_shader *shader = ctx->shader;
 	struct tgsi_shader_info *info = &shader->selector->info;
 	struct gallivm_state *gallivm = bld_base->base.gallivm;
+	struct lp_build_if_state if_state;
 	LLVMValueRef soffset = LLVMGetParam(ctx->main_fn,
 					    SI_PARAM_GS2VS_OFFSET);
 	LLVMValueRef gs_next_vertex;
@@ -5264,19 +5307,28 @@
 				       "");
 
 	/* If this thread has already emitted the declared maximum number of
-	 * vertices, kill it: excessive vertex emissions are not supposed to
-	 * have any effect, and GS threads have no externally observable
-	 * effects other than emitting vertices.
+	 * vertices, skip the write: excessive vertex emissions are not
+	 * supposed to have any effect.
+	 *
+	 * If the shader has no writes to memory, kill it instead. This skips
+	 * further memory loads and may allow LLVM to skip to the end
+	 * altogether.
 	 */
-	can_emit = LLVMBuildICmp(gallivm->builder, LLVMIntULE, gs_next_vertex,
+	can_emit = LLVMBuildICmp(gallivm->builder, LLVMIntULT, gs_next_vertex,
 				 lp_build_const_int32(gallivm,
 						      shader->selector->gs_max_out_vertices), "");
-	kill = lp_build_select(&bld_base->base, can_emit,
-			       lp_build_const_float(gallivm, 1.0f),
-			       lp_build_const_float(gallivm, -1.0f));
 
-	lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
-			   ctx->voidt, &kill, 1, 0);
+	bool use_kill = !info->writes_memory;
+	if (use_kill) {
+		kill = lp_build_select(&bld_base->base, can_emit,
+				       lp_build_const_float(gallivm, 1.0f),
+				       lp_build_const_float(gallivm, -1.0f));
+
+		lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
+				   ctx->voidt, &kill, 1, 0);
+	} else {
+		lp_build_if(&if_state, gallivm, can_emit);
+	}
 
 	for (i = 0; i < info->num_outputs; i++) {
 		LLVMValueRef *out_ptr =
@@ -5302,6 +5354,7 @@
 					    1, 0, 1, 1, 0);
 		}
 	}
+
 	gs_next_vertex = lp_build_add(uint, gs_next_vertex,
 				      lp_build_const_int32(gallivm, 1));
 
@@ -5312,6 +5365,9 @@
 	args[1] = LLVMGetParam(ctx->main_fn, SI_PARAM_GS_WAVE_ID);
 	lp_build_intrinsic(gallivm->builder, "llvm.SI.sendmsg",
 			   ctx->voidt, args, 2, 0);
+
+	if (!use_kill)
+		lp_build_endif(&if_state);
 }
 
 /* Cut one primitive from the geometry shader */
@@ -5344,7 +5400,7 @@
 	 * always fits into a single wave.
 	 */
 	if (ctx->type == PIPE_SHADER_TESS_CTRL) {
-		emit_optimization_barrier(ctx);
+		emit_waitcnt(ctx, LGKM_CNT & VM_CNT);
 		return;
 	}
 
@@ -5481,6 +5537,23 @@
 		"tess_lds");
 }
 
+static unsigned si_get_max_workgroup_size(struct si_shader *shader)
+{
+	const unsigned *properties = shader->selector->info.properties;
+	unsigned max_work_group_size =
+	               properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] *
+	               properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] *
+	               properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH];
+
+	if (!max_work_group_size) {
+		/* This is a variable group size compute shader,
+		 * compile it for the maximum possible group size.
+		 */
+		max_work_group_size = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
+	}
+	return max_work_group_size;
+}
+
 static void create_function(struct si_shader_context *ctx)
 {
 	struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base;
@@ -5706,22 +5779,9 @@
 				      S_0286D0_FRONT_FACE_ENA(1) |
 				      S_0286D0_POS_FIXED_PT_ENA(1));
 	} else if (ctx->type == PIPE_SHADER_COMPUTE) {
-		const unsigned *properties = shader->selector->info.properties;
-		unsigned max_work_group_size =
-		               properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] *
-		               properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] *
-		               properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH];
-
-		if (!max_work_group_size) {
-			/* This is a variable group size compute shader,
-			 * compile it for the maximum possible group size.
-			 */
-			max_work_group_size = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
-		}
-
 		si_llvm_add_attribute(ctx->main_fn,
 				      "amdgpu-max-work-group-size",
-				      max_work_group_size);
+				      si_get_max_workgroup_size(shader));
 	}
 
 	shader->info.num_input_sgprs = 0;
@@ -6643,20 +6703,11 @@
 	 * LLVM 3.9svn has this bug.
 	 */
 	if (sel->type == PIPE_SHADER_COMPUTE) {
-		unsigned *props = sel->info.properties;
 		unsigned wave_size = 64;
 		unsigned max_vgprs = 256;
 		unsigned max_sgprs = sscreen->b.chip_class >= VI ? 800 : 512;
 		unsigned max_sgprs_per_wave = 128;
-		unsigned max_block_threads;
-
-		if (props[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH])
-			max_block_threads = props[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] *
-					    props[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT] *
-					    props[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH];
-		else
-			max_block_threads = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
-
+		unsigned max_block_threads = si_get_max_workgroup_size(shader);
 		unsigned min_waves_per_cu = DIV_ROUND_UP(max_block_threads, wave_size);
 		unsigned min_waves_per_simd = DIV_ROUND_UP(min_waves_per_cu, 4);
 
@@ -7746,11 +7797,31 @@
 	return true;
 }
 
-static void si_fix_num_sgprs(struct si_shader *shader)
+void si_multiwave_lds_size_workaround(struct si_screen *sscreen,
+				      unsigned *lds_size)
+{
+	/* SPI barrier management bug:
+	 *   Make sure we have at least 4k of LDS in use to avoid the bug.
+	 *   It applies to workgroup sizes of more than one wavefront.
+	 */
+	if (sscreen->b.family == CHIP_BONAIRE ||
+	    sscreen->b.family == CHIP_KABINI ||
+	    sscreen->b.family == CHIP_MULLINS)
+		*lds_size = MAX2(*lds_size, 8);
+}
+
+static void si_fix_resource_usage(struct si_screen *sscreen,
+				  struct si_shader *shader)
 {
 	unsigned min_sgprs = shader->info.num_input_sgprs + 2; /* VCC */
 
 	shader->config.num_sgprs = MAX2(shader->config.num_sgprs, min_sgprs);
+
+	if (shader->selector->type == PIPE_SHADER_COMPUTE &&
+	    si_get_max_workgroup_size(shader) > 64) {
+		si_multiwave_lds_size_workaround(sscreen,
+						 &shader->config.lds_size);
+	}
 }
 
 int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
@@ -7846,7 +7917,7 @@
 		}
 	}
 
-	si_fix_num_sgprs(shader);
+	si_fix_resource_usage(sscreen, shader);
 	si_shader_dump(sscreen, shader, debug, sel->info.processor,
 		       stderr);
 
diff -Nru mesa-13.0.2/src/gallium/drivers/radeonsi/si_shader.h mesa-13.0.3/src/gallium/drivers/radeonsi/si_shader.h
--- mesa-13.0.2/src/gallium/drivers/radeonsi/si_shader.h	2016-11-10 22:06:40.000000000 +0000
+++ mesa-13.0.3/src/gallium/drivers/radeonsi/si_shader.h	2016-12-14 19:03:11.000000000 +0000
@@ -482,6 +482,8 @@
 void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
 		    struct pipe_debug_callback *debug, unsigned processor,
 		    FILE *f);
+void si_multiwave_lds_size_workaround(struct si_screen *sscreen,
+				      unsigned *lds_size);
 void si_shader_apply_scratch_relocs(struct si_context *sctx,
 			struct si_shader *shader,
 			struct si_shader_config *config,
diff -Nru mesa-13.0.2/src/gallium/drivers/radeonsi/si_state.c mesa-13.0.3/src/gallium/drivers/radeonsi/si_state.c
--- mesa-13.0.2/src/gallium/drivers/radeonsi/si_state.c	2016-11-10 22:06:40.000000000 +0000
+++ mesa-13.0.3/src/gallium/drivers/radeonsi/si_state.c	2016-12-14 19:03:10.000000000 +0000
@@ -453,8 +453,14 @@
 			S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
 
 		/* Only set dual source blending for MRT0 to avoid a hang. */
-		if (i >= 1 && blend->dual_src_blend)
+		if (i >= 1 && blend->dual_src_blend) {
+			/* Vulkan does this for dual source blending. */
+			if (i == 1)
+				blend_cntl |= S_028780_ENABLE(1);
+
+			si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
 			continue;
+		}
 
 		/* Only addition and subtraction equations are supported with
 		 * dual source blending.
@@ -463,16 +469,14 @@
 		    (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX ||
 		     eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX)) {
 			assert(!"Unsupported equation for dual source blending");
+			si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
 			continue;
 		}
 
-		if (!state->rt[j].colormask)
-			continue;
-
 		/* cb_render_state will disable unused ones */
 		blend->cb_target_mask |= (unsigned)state->rt[j].colormask << (4 * i);
 
-		if (!state->rt[j].blend_enable) {
+		if (!state->rt[j].colormask || !state->rt[j].blend_enable) {
 			si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
 			continue;
 		}
@@ -553,6 +557,17 @@
 	}
 
 	if (sctx->b.family == CHIP_STONEY) {
+		/* Disable RB+ blend optimizations for dual source blending.
+		 * Vulkan does this.
+		 */
+		if (blend->dual_src_blend) {
+			for (int i = 0; i < 8; i++) {
+				sx_mrt_blend_opt[i] =
+					S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) |
+					S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE);
+			}
+		}
+
 		for (int i = 0; i < 8; i++)
 			si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4,
 				       sx_mrt_blend_opt[i]);
diff -Nru mesa-13.0.2/src/gallium/drivers/radeonsi/si_state_draw.c mesa-13.0.3/src/gallium/drivers/radeonsi/si_state_draw.c
--- mesa-13.0.2/src/gallium/drivers/radeonsi/si_state_draw.c	2016-11-10 22:06:40.000000000 +0000
+++ mesa-13.0.3/src/gallium/drivers/radeonsi/si_state_draw.c	2016-12-14 19:03:11.000000000 +0000
@@ -154,6 +154,12 @@
 	 */
 	*num_patches = MIN2(*num_patches, 40);
 
+	/* SI bug workaround - limit LS-HS threadgroups to only one wave. */
+	if (sctx->b.chip_class == SI) {
+		unsigned one_wave = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp);
+		*num_patches = MIN2(*num_patches, one_wave);
+	}
+
 	output_patch0_offset = input_patch_size * *num_patches;
 	perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size;
 
@@ -162,11 +168,13 @@
 
 	if (sctx->b.chip_class >= CIK) {
 		assert(lds_size <= 65536);
-		ls_rsrc2 |= S_00B52C_LDS_SIZE(align(lds_size, 512) / 512);
+		lds_size = align(lds_size, 512) / 512;
 	} else {
 		assert(lds_size <= 32768);
-		ls_rsrc2 |= S_00B52C_LDS_SIZE(align(lds_size, 256) / 256);
+		lds_size = align(lds_size, 256) / 256;
 	}
+	si_multiwave_lds_size_workaround(sctx->screen, &lds_size);
+	ls_rsrc2 |= S_00B52C_LDS_SIZE(lds_size);
 
 	if (sctx->last_ls == ls->current &&
 	    sctx->last_tcs == tcs &&
@@ -284,10 +292,18 @@
 
 		/* Needed for 028B6C_DISTRIBUTION_MODE != 0 */
 		if (sctx->screen->has_distributed_tess) {
-			if (sctx->gs_shader.cso)
+			if (sctx->gs_shader.cso) {
 				partial_es_wave = true;
-			else
+
+				/* GPU hang workaround. */
+				if (sctx->b.family == CHIP_TONGA ||
+				    sctx->b.family == CHIP_FIJI ||
+				    sctx->b.family == CHIP_POLARIS10 ||
+				    sctx->b.family == CHIP_POLARIS11)
+					partial_vs_wave = true;
+			} else {
 				partial_vs_wave = true;
+			}
 		}
 	}
 
diff -Nru mesa-13.0.2/src/gallium/drivers/radeonsi/si_state_shaders.c mesa-13.0.3/src/gallium/drivers/radeonsi/si_state_shaders.c
--- mesa-13.0.2/src/gallium/drivers/radeonsi/si_state_shaders.c	2016-11-10 22:06:40.000000000 +0000
+++ mesa-13.0.3/src/gallium/drivers/radeonsi/si_state_shaders.c	2016-12-16 14:03:24.000000000 +0000
@@ -1777,10 +1777,15 @@
 				   sctx->esgs_ring, 0, sctx->esgs_ring->width0,
 				   false, false, 0, 0, 0);
 	}
-	if (sctx->gsvs_ring)
+	if (sctx->gsvs_ring) {
 		si_set_ring_buffer(&sctx->b.b, SI_VS_RING_GSVS,
 				   sctx->gsvs_ring, 0, sctx->gsvs_ring->width0,
 				   false, false, 0, 0, 0);
+
+		/* Also update SI_GS_RING_GSVSi descriptors. */
+		sctx->last_gsvs_itemsize = 0;
+	}
+
 	return true;
 }
 
diff -Nru mesa-13.0.2/src/gallium/drivers/vc4/vc4_program.c mesa-13.0.3/src/gallium/drivers/vc4/vc4_program.c
--- mesa-13.0.2/src/gallium/drivers/vc4/vc4_program.c	2016-11-23 18:56:30.000000000 +0000
+++ mesa-13.0.3/src/gallium/drivers/vc4/vc4_program.c	2016-12-14 19:03:11.000000000 +0000
@@ -1865,22 +1865,29 @@
 static void
 ntq_emit_jump(struct vc4_compile *c, nir_jump_instr *jump)
 {
+        struct qblock *jump_block;
         switch (jump->type) {
         case nir_jump_break:
-                qir_SF(c, c->execute);
-                qir_MOV_cond(c, QPU_COND_ZS, c->execute,
-                             qir_uniform_ui(c, c->loop_break_block->index));
+                jump_block = c->loop_break_block;
                 break;
-
         case nir_jump_continue:
-                qir_SF(c, c->execute);
-                qir_MOV_cond(c, QPU_COND_ZS, c->execute,
-                             qir_uniform_ui(c, c->loop_cont_block->index));
+                jump_block = c->loop_cont_block;
                 break;
-
-        case nir_jump_return:
-                unreachable("All returns shouold be lowered\n");
+        default:
+                unreachable("Unsupported jump type\n");
         }
+
+        qir_SF(c, c->execute);
+        qir_MOV_cond(c, QPU_COND_ZS, c->execute,
+                     qir_uniform_ui(c, jump_block->index));
+
+        /* Jump to the destination block if everyone has taken the jump. */
+        qir_SF(c, qir_SUB(c, c->execute, qir_uniform_ui(c, jump_block->index)));
+        qir_BRANCH(c, QPU_COND_BRANCH_ALL_ZS);
+        struct qblock *new_block = qir_new_block(c);
+        qir_link_blocks(c->cur_block, jump_block);
+        qir_link_blocks(c->cur_block, new_block);
+        qir_set_emit_block(c, new_block);
 }
 
 static void
diff -Nru mesa-13.0.2/src/intel/genxml/gen9_pack.h mesa-13.0.3/src/intel/genxml/gen9_pack.h
--- mesa-13.0.2/src/intel/genxml/gen9_pack.h	2016-11-28 15:16:41.000000000 +0000
+++ mesa-13.0.3/src/intel/genxml/gen9_pack.h	2017-01-05 15:53:01.000000000 +0000
@@ -8220,7 +8220,6 @@
 #define GEN9_MI_SEMAPHORE_WAIT_header           \
    .CommandType                         =      0,  \
    .MICommandOpcode                     =     28,  \
-   .RegisterPollMode                    =      1,  \
    .DWordLength                         =      2
 
 struct GEN9_MI_SEMAPHORE_WAIT {
@@ -8229,7 +8228,7 @@
    uint32_t                             MemoryType;
 #define PerProcessGraphicsAddress                0
 #define GlobalGraphicsAddress                    1
-   uint32_t                             RegisterPollMode;
+   bool                                 RegisterPollMode;
    uint32_t                             WaitMode;
 #define PollingMode                              1
 #define SignalMode                               0
diff -Nru mesa-13.0.2/src/intel/genxml/gen9.xml mesa-13.0.3/src/intel/genxml/gen9.xml
--- mesa-13.0.2/src/intel/genxml/gen9.xml	2016-11-10 22:05:17.000000000 +0000
+++ mesa-13.0.3/src/intel/genxml/gen9.xml	2016-12-14 19:03:11.000000000 +0000
@@ -3194,7 +3194,7 @@
       <value name="Per Process Graphics Address" value="0"/>
       <value name="Global Graphics Address" value="1"/>
     </field>
-    <field name="Register Poll Mode" start="16" end="16" type="uint" default="1"/>
+    <field name="Register Poll Mode" start="16" end="16" type="bool"/>
     <field name="Wait Mode" start="15" end="15" type="uint">
       <value name="Polling Mode" value="1"/>
       <value name="Signal Mode" value="0"/>
diff -Nru mesa-13.0.2/src/intel/genxml/gen9_xml.h mesa-13.0.3/src/intel/genxml/gen9_xml.h
--- mesa-13.0.2/src/intel/genxml/gen9_xml.h	2016-11-28 15:16:41.000000000 +0000
+++ mesa-13.0.3/src/intel/genxml/gen9_xml.h	2017-01-05 15:53:01.000000000 +0000
@@ -15247,8 +15247,7 @@
   0x72, 0x20, 0x50, 0x6f, 0x6c, 0x6c, 0x20, 0x4d, 0x6f, 0x64, 0x65, 0x22,
   0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x3d, 0x22, 0x31, 0x36, 0x22, 0x20,
   0x65, 0x6e, 0x64, 0x3d, 0x22, 0x31, 0x36, 0x22, 0x20, 0x74, 0x79, 0x70,
-  0x65, 0x3d, 0x22, 0x75, 0x69, 0x6e, 0x74, 0x22, 0x20, 0x64, 0x65, 0x66,
-  0x61, 0x75, 0x6c, 0x74, 0x3d, 0x22, 0x31, 0x22, 0x2f, 0x3e, 0x0a, 0x20,
+  0x65, 0x3d, 0x22, 0x62, 0x6f, 0x6f, 0x6c, 0x22, 0x2f, 0x3e, 0x0a, 0x20,
   0x20, 0x20, 0x20, 0x3c, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x20, 0x6e, 0x61,
   0x6d, 0x65, 0x3d, 0x22, 0x57, 0x61, 0x69, 0x74, 0x20, 0x4d, 0x6f, 0x64,
   0x65, 0x22, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x3d, 0x22, 0x31, 0x35,
diff -Nru mesa-13.0.2/src/intel/vulkan/anv_allocator.c mesa-13.0.3/src/intel/vulkan/anv_allocator.c
--- mesa-13.0.2/src/intel/vulkan/anv_allocator.c	2016-11-10 22:06:40.000000000 +0000
+++ mesa-13.0.3/src/intel/vulkan/anv_allocator.c	2016-12-14 19:03:10.000000000 +0000
@@ -246,10 +246,12 @@
 static uint32_t
 anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state);
 
-void
+VkResult
 anv_block_pool_init(struct anv_block_pool *pool,
                     struct anv_device *device, uint32_t block_size)
 {
+   VkResult result;
+
    assert(util_is_power_of_two(block_size));
 
    pool->device = device;
@@ -260,17 +262,23 @@
 
    pool->fd = memfd_create("block pool", MFD_CLOEXEC);
    if (pool->fd == -1)
-      return;
+      return vk_error(VK_ERROR_INITIALIZATION_FAILED);
 
    /* Just make it 2GB up-front.  The Linux kernel won't actually back it
     * with pages until we either map and fault on one of them or we use
     * userptr and send a chunk of it off to the GPU.
     */
-   if (ftruncate(pool->fd, BLOCK_POOL_MEMFD_SIZE) == -1)
-      return;
-
-   u_vector_init(&pool->mmap_cleanups,
-                   round_to_power_of_two(sizeof(struct anv_mmap_cleanup)), 128);
+   if (ftruncate(pool->fd, BLOCK_POOL_MEMFD_SIZE) == -1) {
+      result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
+      goto fail_fd;
+   }
+
+   if (!u_vector_init(&pool->mmap_cleanups,
+                      round_to_power_of_two(sizeof(struct anv_mmap_cleanup)),
+                      128)) {
+      result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
+      goto fail_fd;
+   }
 
    pool->state.next = 0;
    pool->state.end = 0;
@@ -279,6 +287,13 @@
 
    /* Immediately grow the pool so we'll have a backing bo. */
    pool->state.end = anv_block_pool_grow(pool, &pool->state);
+
+   return VK_SUCCESS;
+
+ fail_fd:
+   close(pool->fd);
+
+   return result;
 }
 
 void
diff -Nru mesa-13.0.2/src/intel/vulkan/anv_descriptor_set.c mesa-13.0.3/src/intel/vulkan/anv_descriptor_set.c
--- mesa-13.0.2/src/intel/vulkan/anv_descriptor_set.c	2016-11-24 16:34:40.000000000 +0000
+++ mesa-13.0.3/src/intel/vulkan/anv_descriptor_set.c	2016-12-16 14:03:24.000000000 +0000
@@ -498,6 +498,7 @@
       struct surface_state_free_list_entry *entry =
          set->buffer_views[b].surface_state.map;
       entry->next = pool->surface_state_free_list;
+      entry->offset = set->buffer_views[b].surface_state.offset;
       pool->surface_state_free_list = entry;
    }
 
diff -Nru mesa-13.0.2/src/intel/vulkan/anv_device.c mesa-13.0.3/src/intel/vulkan/anv_device.c
--- mesa-13.0.2/src/intel/vulkan/anv_device.c	2016-11-24 16:34:42.000000000 +0000
+++ mesa-13.0.3/src/intel/vulkan/anv_device.c	2016-12-16 14:03:24.000000000 +0000
@@ -24,6 +24,7 @@
 #include <assert.h>
 #include <stdbool.h>
 #include <string.h>
+#include <sys/mman.h>
 #include <unistd.h>
 #include <fcntl.h>
 
@@ -162,8 +163,6 @@
          device->info.max_cs_threads = max_cs_threads;
    }
 
-   close(fd);
-
    brw_process_intel_debug_variable();
 
    device->compiler = brw_compiler_create(NULL, &device->info);
@@ -175,12 +174,15 @@
    device->compiler->shader_perf_log = compiler_perf_log;
 
    result = anv_init_wsi(device);
-   if (result != VK_SUCCESS)
-       goto fail;
+   if (result != VK_SUCCESS) {
+      ralloc_free(device->compiler);
+      goto fail;
+   }
 
    /* XXX: Actually detect bit6 swizzling */
    isl_device_init(&device->isl_dev, &device->info, swizzled);
 
+   close(fd);
    return VK_SUCCESS;
 
 fail:
@@ -527,7 +529,7 @@
       .maxGeometryTotalOutputComponents         = 1024,
       .maxFragmentInputComponents               = 128,
       .maxFragmentOutputAttachments             = 8,
-      .maxFragmentDualSrcAttachments            = 2,
+      .maxFragmentDualSrcAttachments            = 1,
       .maxFragmentCombinedOutputResources       = 8,
       .maxComputeSharedMemorySize               = 32768,
       .maxComputeWorkGroupCount                 = { 65535, 65535, 65535 },
@@ -967,10 +969,10 @@
 {
    ANV_FROM_HANDLE(anv_device, device, _device);
 
-   anv_queue_finish(&device->queue);
-
    anv_device_finish_blorp(device);
 
+   anv_queue_finish(&device->queue);
+
 #ifdef HAVE_VALGRIND
    /* We only need to free these to prevent valgrind errors.  The backing
     * BO will go away in a couple of lines so we don't actually leak.
@@ -978,22 +980,27 @@
    anv_state_pool_free(&device->dynamic_state_pool, device->border_colors);
 #endif
 
+   anv_scratch_pool_finish(device, &device->scratch_pool);
+
    anv_gem_munmap(device->workaround_bo.map, device->workaround_bo.size);
    anv_gem_close(device, device->workaround_bo.gem_handle);
 
-   anv_bo_pool_finish(&device->batch_bo_pool);
-   anv_state_pool_finish(&device->dynamic_state_pool);
-   anv_block_pool_finish(&device->dynamic_state_block_pool);
-   anv_state_pool_finish(&device->instruction_state_pool);
-   anv_block_pool_finish(&device->instruction_block_pool);
    anv_state_pool_finish(&device->surface_state_pool);
    anv_block_pool_finish(&device->surface_state_block_pool);
-   anv_scratch_pool_finish(device, &device->scratch_pool);
+   anv_state_pool_finish(&device->instruction_state_pool);
+   anv_block_pool_finish(&device->instruction_block_pool);
+   anv_state_pool_finish(&device->dynamic_state_pool);
+   anv_block_pool_finish(&device->dynamic_state_block_pool);
 
-   close(device->fd);
+   anv_bo_pool_finish(&device->batch_bo_pool);
 
+   pthread_cond_destroy(&device->queue_submit);
    pthread_mutex_destroy(&device->mutex);
 
+   anv_gem_destroy_context(device, device->context_id);
+
+   close(device->fd);
+
    vk_free(&device->alloc, device);
 }
 
@@ -1236,6 +1243,9 @@
 
    mem->type_index = pAllocateInfo->memoryTypeIndex;
 
+   mem->map = NULL;
+   mem->map_size = 0;
+
    *pMem = anv_device_memory_to_handle(mem);
 
    return VK_SUCCESS;
@@ -1257,6 +1267,9 @@
    if (mem == NULL)
       return;
 
+   if (mem->map)
+      anv_UnmapMemory(_device, _mem);
+
    if (mem->bo.map)
       anv_gem_munmap(mem->bo.map, mem->bo.size);
 
@@ -1303,8 +1316,12 @@
    /* Let's map whole pages */
    map_size = align_u64(map_size, 4096);
 
-   mem->map = anv_gem_mmap(device, mem->bo.gem_handle,
-                           map_offset, map_size, gem_flags);
+   void *map = anv_gem_mmap(device, mem->bo.gem_handle,
+                            map_offset, map_size, gem_flags);
+   if (map == MAP_FAILED)
+      return vk_error(VK_ERROR_MEMORY_MAP_FAILED);
+
+   mem->map = map;
    mem->map_size = map_size;
 
    *ppData = mem->map + (offset - map_offset);
@@ -1322,6 +1339,9 @@
       return;
 
    anv_gem_munmap(mem->map, mem->map_size);
+
+   mem->map = NULL;
+   mem->map_size = 0;
 }
 
 static void
diff -Nru mesa-13.0.2/src/intel/vulkan/anv_gem.c mesa-13.0.3/src/intel/vulkan/anv_gem.c
--- mesa-13.0.2/src/intel/vulkan/anv_gem.c	2016-11-10 22:06:40.000000000 +0000
+++ mesa-13.0.3/src/intel/vulkan/anv_gem.c	2016-12-16 14:03:24.000000000 +0000
@@ -88,10 +88,8 @@
    };
 
    int ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_MMAP, &gem_mmap);
-   if (ret != 0) {
-      /* FIXME: Is NULL the right error return? Cf MAP_INVALID */
-      return NULL;
-   }
+   if (ret != 0)
+      return MAP_FAILED;
 
    VG(VALGRIND_MALLOCLIKE_BLOCK(gem_mmap.addr_ptr, gem_mmap.size, 0, 1));
    return (void *)(uintptr_t) gem_mmap.addr_ptr;
diff -Nru mesa-13.0.2/src/intel/vulkan/anv_image.c mesa-13.0.3/src/intel/vulkan/anv_image.c
--- mesa-13.0.2/src/intel/vulkan/anv_image.c	2016-11-24 16:34:40.000000000 +0000
+++ mesa-13.0.3/src/intel/vulkan/anv_image.c	2016-12-14 19:03:10.000000000 +0000
@@ -194,8 +194,8 @@
          anv_finishme("Test gen8 multisampled HiZ");
       } else {
          isl_surf_get_hiz_surf(&dev->isl_dev, &image->depth_surface.isl,
-                               &image->hiz_surface.isl);
-         add_surface(image, &image->hiz_surface);
+                               &image->aux_surface.isl);
+         add_surface(image, &image->aux_surface);
       }
    }
 
@@ -306,16 +306,16 @@
       /* The offset and size must be a multiple of 4K or else the
        * anv_gem_mmap call below will return NULL.
        */
-      assert((image->offset + image->hiz_surface.offset) % 4096 == 0);
-      assert(image->hiz_surface.isl.size % 4096 == 0);
+      assert((image->offset + image->aux_surface.offset) % 4096 == 0);
+      assert(image->aux_surface.isl.size % 4096 == 0);
 
       /* HiZ surfaces need to have their memory cleared to 0 before they
        * can be used.  If we let it have garbage data, it can cause GPU
        * hangs on some hardware.
        */
       void *map = anv_gem_mmap(device, image->bo->gem_handle,
-                               image->offset + image->hiz_surface.offset,
-                               image->hiz_surface.isl.size,
+                               image->offset + image->aux_surface.offset,
+                               image->aux_surface.isl.size,
                                device->info.has_llc ? 0 : I915_MMAP_WC);
 
       /* If anv_gem_mmap returns NULL, it's likely that the kernel was
@@ -324,9 +324,9 @@
       if (map == NULL)
          return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
 
-      memset(map, 0, image->hiz_surface.isl.size);
+      memset(map, 0, image->aux_surface.isl.size);
 
-      anv_gem_munmap(map, image->hiz_surface.isl.size);
+      anv_gem_munmap(map, image->aux_surface.isl.size);
    }
 
    return VK_SUCCESS;
diff -Nru mesa-13.0.2/src/intel/vulkan/anv_private.h mesa-13.0.3/src/intel/vulkan/anv_private.h
--- mesa-13.0.2/src/intel/vulkan/anv_private.h	2016-11-23 18:56:30.000000000 +0000
+++ mesa-13.0.3/src/intel/vulkan/anv_private.h	2016-12-14 19:03:10.000000000 +0000
@@ -416,8 +416,8 @@
    anv_clflush_range(state.map, state.alloc_size);
 }
 
-void anv_block_pool_init(struct anv_block_pool *pool,
-                         struct anv_device *device, uint32_t block_size);
+VkResult anv_block_pool_init(struct anv_block_pool *pool,
+                             struct anv_device *device, uint32_t block_size);
 void anv_block_pool_finish(struct anv_block_pool *pool);
 int32_t anv_block_pool_alloc(struct anv_block_pool *pool);
 int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool);
@@ -1526,10 +1526,11 @@
 
       struct {
          struct anv_surface depth_surface;
-         struct anv_surface hiz_surface;
          struct anv_surface stencil_surface;
       };
    };
+
+   struct anv_surface aux_surface;
 };
 
 static inline uint32_t
@@ -1593,11 +1594,11 @@
 static inline bool
 anv_image_has_hiz(const struct anv_image *image)
 {
-   /* We must check the aspect because anv_image::hiz_surface belongs to
-    * a union.
+   /* We must check the aspect because anv_image::aux_surface may be used for
+    * any type of auxiliary surface, not just HiZ.
     */
    return (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
-          image->hiz_surface.isl.size > 0;
+          image->aux_surface.isl.size > 0;
 }
 
 struct anv_buffer_view {
diff -Nru mesa-13.0.2/src/intel/vulkan/anv_timestamp.h mesa-13.0.3/src/intel/vulkan/anv_timestamp.h
--- mesa-13.0.2/src/intel/vulkan/anv_timestamp.h	2016-11-28 15:16:44.000000000 +0000
+++ mesa-13.0.3/src/intel/vulkan/anv_timestamp.h	2017-01-05 15:53:04.000000000 +0000
@@ -1 +1 @@
-#define ANV_TIMESTAMP "1480346204"
+#define ANV_TIMESTAMP "1483631584"
diff -Nru mesa-13.0.2/src/intel/vulkan/genX_cmd_buffer.c mesa-13.0.3/src/intel/vulkan/genX_cmd_buffer.c
--- mesa-13.0.2/src/intel/vulkan/genX_cmd_buffer.c	2016-11-28 12:19:19.000000000 +0000
+++ mesa-13.0.3/src/intel/vulkan/genX_cmd_buffer.c	2016-12-16 14:03:24.000000000 +0000
@@ -1356,22 +1356,13 @@
       result = emit_binding_table(cmd_buffer, MESA_SHADER_COMPUTE, &surfaces);
       assert(result == VK_SUCCESS);
    }
+
    result = emit_samplers(cmd_buffer, MESA_SHADER_COMPUTE, &samplers);
    assert(result == VK_SUCCESS);
 
-
-   struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer);
-
    const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
    const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
 
-   if (push_state.alloc_size) {
-      anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD), curbe) {
-         curbe.CURBETotalDataLength    = push_state.alloc_size;
-         curbe.CURBEDataStartAddress   = push_state.offset;
-      }
-   }
-
    const uint32_t slm_size = encode_slm_size(GEN_GEN, prog_data->total_shared);
 
    struct anv_state state =
@@ -1441,6 +1432,18 @@
       cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT;
    }
 
+   if (cmd_buffer->state.push_constants_dirty & VK_SHADER_STAGE_COMPUTE_BIT) {
+      struct anv_state push_state =
+         anv_cmd_buffer_cs_push_constants(cmd_buffer);
+
+      if (push_state.alloc_size) {
+         anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD), curbe) {
+            curbe.CURBETotalDataLength    = push_state.alloc_size;
+            curbe.CURBEDataStartAddress   = push_state.offset;
+         }
+      }
+   }
+
    cmd_buffer->state.compute_dirty = 0;
 
    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
@@ -1796,10 +1799,10 @@
    if (has_hiz) {
       anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_HIER_DEPTH_BUFFER), hdb) {
          hdb.HierarchicalDepthBufferObjectControlState = GENX(MOCS);
-         hdb.SurfacePitch = image->hiz_surface.isl.row_pitch - 1;
+         hdb.SurfacePitch = image->aux_surface.isl.row_pitch - 1;
          hdb.SurfaceBaseAddress = (struct anv_address) {
             .bo = image->bo,
-            .offset = image->offset + image->hiz_surface.offset,
+            .offset = image->offset + image->aux_surface.offset,
          };
 #if GEN_GEN >= 8
          /* From the SKL PRM Vol2a:
@@ -1809,11 +1812,14 @@
           *    - SURFTYPE_1D: distance in pixels between array slices
           *    - SURFTYPE_2D/CUBE: distance in rows between array slices
           *    - SURFTYPE_3D: distance in rows between R - slices
+          *
+          * Unfortunately, the docs aren't 100% accurate here.  They fail to
+          * mention that the 1-D rule only applies to linear 1-D images.
+          * Since depth and HiZ buffers are always tiled, they are treated as
+          * 2-D images.  Prior to Sky Lake, this field is always in rows.
           */
          hdb.SurfaceQPitch =
-            image->hiz_surface.isl.dim == ISL_SURF_DIM_1D ?
-               isl_surf_get_array_pitch_el(&image->hiz_surface.isl) >> 2 :
-               isl_surf_get_array_pitch_el_rows(&image->hiz_surface.isl) >> 2;
+            isl_surf_get_array_pitch_el_rows(&image->aux_surface.isl) >> 2;
 #endif
       }
    } else {
diff -Nru mesa-13.0.2/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp mesa-13.0.3/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
--- mesa-13.0.2/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp	2016-11-10 22:06:40.000000000 +0000
+++ mesa-13.0.3/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp	2016-12-24 13:03:53.000000000 +0000
@@ -129,7 +129,7 @@
          foreach_in_list(acp_entry, entry, &out_acp[block->num][i]) {
             acp[next_acp] = entry;
 
-            /* opt_copy_propagate_local populates out_acp with copies created
+            /* opt_copy_propagation_local populates out_acp with copies created
              * in a block which are still live at the end of the block.  This
              * is exactly what we want in the COPY set.
              */
@@ -431,7 +431,9 @@
    if (entry->saturate) {
       switch(inst->opcode) {
       case BRW_OPCODE_SEL:
-         if (inst->src[1].file != IMM ||
+         if ((inst->conditional_mod != BRW_CONDITIONAL_GE &&
+              inst->conditional_mod != BRW_CONDITIONAL_L) ||
+             inst->src[1].file != IMM ||
              inst->src[1].f < 0.0 ||
              inst->src[1].f > 1.0) {
             return false;
@@ -735,8 +737,8 @@
  * list.
  */
 bool
-fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
-                                     exec_list *acp)
+fs_visitor::opt_copy_propagation_local(void *copy_prop_ctx, bblock_t *block,
+                                       exec_list *acp)
 {
    bool progress = false;
 
@@ -819,7 +821,7 @@
 }
 
 bool
-fs_visitor::opt_copy_propagate()
+fs_visitor::opt_copy_propagation()
 {
    bool progress = false;
    void *copy_prop_ctx = ralloc_context(NULL);
@@ -832,8 +834,8 @@
     * the set of copies available at the end of the block.
     */
    foreach_block (block, cfg) {
-      progress = opt_copy_propagate_local(copy_prop_ctx, block,
-                                          out_acp[block->num]) || progress;
+      progress = opt_copy_propagation_local(copy_prop_ctx, block,
+                                            out_acp[block->num]) || progress;
    }
 
    /* Do dataflow analysis for those available copies. */
@@ -852,7 +854,8 @@
          }
       }
 
-      progress = opt_copy_propagate_local(copy_prop_ctx, block, in_acp) || progress;
+      progress = opt_copy_propagation_local(copy_prop_ctx, block, in_acp) ||
+                 progress;
    }
 
    for (int i = 0; i < cfg->num_blocks; i++)
diff -Nru mesa-13.0.2/src/mesa/drivers/dri/i965/brw_fs.cpp mesa-13.0.3/src/mesa/drivers/dri/i965/brw_fs.cpp
--- mesa-13.0.2/src/mesa/drivers/dri/i965/brw_fs.cpp	2016-11-10 22:06:40.000000000 +0000
+++ mesa-13.0.3/src/mesa/drivers/dri/i965/brw_fs.cpp	2016-12-16 14:03:24.000000000 +0000
@@ -5692,7 +5692,7 @@
 
       OPT(opt_algebraic);
       OPT(opt_cse);
-      OPT(opt_copy_propagate);
+      OPT(opt_copy_propagation);
       OPT(opt_predicated_break, this);
       OPT(opt_cmod_propagation);
       OPT(dead_code_eliminate);
@@ -5716,7 +5716,7 @@
    }
 
    if (OPT(lower_d2x)) {
-      OPT(opt_copy_propagate);
+      OPT(opt_copy_propagation);
       OPT(dead_code_eliminate);
    }
 
@@ -5728,12 +5728,12 @@
    OPT(lower_logical_sends);
 
    if (progress) {
-      OPT(opt_copy_propagate);
+      OPT(opt_copy_propagation);
       /* Only run after logical send lowering because it's easier to implement
        * in terms of physical sends.
        */
       if (OPT(opt_zero_samples))
-         OPT(opt_copy_propagate);
+         OPT(opt_copy_propagation);
       /* Run after logical send lowering to give it a chance to CSE the
        * LOAD_PAYLOAD instructions created to construct the payloads of
        * e.g. texturing messages in cases where it wasn't possible to CSE the
@@ -5762,7 +5762,7 @@
    if (devinfo->gen <= 5 && OPT(lower_minmax)) {
       OPT(opt_cmod_propagation);
       OPT(opt_cse);
-      OPT(opt_copy_propagate);
+      OPT(opt_copy_propagation);
       OPT(dead_code_eliminate);
    }
 
diff -Nru mesa-13.0.2/src/mesa/drivers/dri/i965/brw_fs.h mesa-13.0.3/src/mesa/drivers/dri/i965/brw_fs.h
--- mesa-13.0.2/src/mesa/drivers/dri/i965/brw_fs.h	2016-11-10 22:05:17.000000000 +0000
+++ mesa-13.0.3/src/mesa/drivers/dri/i965/brw_fs.h	2016-12-16 14:03:24.000000000 +0000
@@ -133,11 +133,11 @@
    bool opt_redundant_discard_jumps();
    bool opt_cse();
    bool opt_cse_local(bblock_t *block);
-   bool opt_copy_propagate();
+   bool opt_copy_propagation();
    bool try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry);
    bool try_constant_propagate(fs_inst *inst, acp_entry *entry);
-   bool opt_copy_propagate_local(void *mem_ctx, bblock_t *block,
-                                 exec_list *acp);
+   bool opt_copy_propagation_local(void *mem_ctx, bblock_t *block,
+                                   exec_list *acp);
    bool opt_drop_redundant_mov_to_flags();
    bool opt_register_renaming();
    bool register_coalesce();
diff -Nru mesa-13.0.2/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp mesa-13.0.3/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
--- mesa-13.0.2/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp	2016-11-24 16:34:39.000000000 +0000
+++ mesa-13.0.3/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp	2016-12-14 19:03:11.000000000 +0000
@@ -780,7 +780,13 @@
    if (compiler->devinfo->gen >= 8)
       output_size_bytes += 32;
 
-   assert(output_size_bytes >= 1);
+   /* Shaders can technically set max_vertices = 0, at which point we
+    * may have a URB size of 0 bytes.  Nothing good can come from that,
+    * so enforce a minimum size.
+    */
+   if (output_size_bytes == 0)
+      output_size_bytes = 1;
+
    unsigned max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES;
    if (compiler->devinfo->gen == 6)
       max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES;
diff -Nru mesa-13.0.2/src/mesa/drivers/dri/i965/intel_mipmap_tree.c mesa-13.0.3/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
--- mesa-13.0.2/src/mesa/drivers/dri/i965/intel_mipmap_tree.c	2016-11-10 22:06:40.000000000 +0000
+++ mesa-13.0.3/src/mesa/drivers/dri/i965/intel_mipmap_tree.c	2016-12-16 14:03:24.000000000 +0000
@@ -2159,6 +2159,8 @@
       intel_miptree_release(&mt->mcs_mt);
       mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS;
    }
+
+   mt->disable_aux_buffers = true;
 }
 
 
diff -Nru mesa-13.0.2/src/mesa/drivers/dri/i965/Makefile.am mesa-13.0.3/src/mesa/drivers/dri/i965/Makefile.am
--- mesa-13.0.2/src/mesa/drivers/dri/i965/Makefile.am	2016-11-10 22:05:17.000000000 +0000
+++ mesa-13.0.3/src/mesa/drivers/dri/i965/Makefile.am	2016-12-16 14:03:24.000000000 +0000
@@ -106,6 +106,7 @@
 
 TESTS = \
 	test_fs_cmod_propagation \
+	test_fs_copy_propagation \
 	test_fs_saturate_propagation \
         test_eu_compact \
 	test_vf_float_conversions \
@@ -121,6 +122,12 @@
 	$(top_builddir)/src/gtest/libgtest.la \
 	$(TEST_LIBS)
 
+test_fs_copy_propagation_SOURCES = \
+	test_fs_copy_propagation.cpp
+test_fs_copy_propagation_LDADD = \
+	$(top_builddir)/src/gtest/libgtest.la \
+	$(TEST_LIBS)
+
 test_fs_saturate_propagation_SOURCES = \
 	test_fs_saturate_propagation.cpp
 test_fs_saturate_propagation_LDADD = \
diff -Nru mesa-13.0.2/src/mesa/drivers/dri/i965/Makefile.in mesa-13.0.3/src/mesa/drivers/dri/i965/Makefile.in
--- mesa-13.0.2/src/mesa/drivers/dri/i965/Makefile.in	2016-11-28 15:16:07.000000000 +0000
+++ mesa-13.0.3/src/mesa/drivers/dri/i965/Makefile.in	2017-01-05 15:52:27.000000000 +0000
@@ -111,6 +111,7 @@
 host_triplet = @host@
 target_triplet = @target@
 TESTS = test_fs_cmod_propagation$(EXEEXT) \
+	test_fs_copy_propagation$(EXEEXT) \
 	test_fs_saturate_propagation$(EXEEXT) test_eu_compact$(EXEEXT) \
 	test_vf_float_conversions$(EXEEXT) \
 	test_vec4_cmod_propagation$(EXEEXT) \
@@ -262,6 +263,7 @@
 	$(libi965_gen9_la_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) \
 	-o $@
 am__EXEEXT_1 = test_fs_cmod_propagation$(EXEEXT) \
+	test_fs_copy_propagation$(EXEEXT) \
 	test_fs_saturate_propagation$(EXEEXT) test_eu_compact$(EXEEXT) \
 	test_vf_float_conversions$(EXEEXT) \
 	test_vec4_cmod_propagation$(EXEEXT) \
@@ -281,6 +283,12 @@
 	$(am_test_fs_cmod_propagation_OBJECTS)
 test_fs_cmod_propagation_DEPENDENCIES =  \
 	$(top_builddir)/src/gtest/libgtest.la $(am__DEPENDENCIES_2)
+am_test_fs_copy_propagation_OBJECTS =  \
+	test_fs_copy_propagation.$(OBJEXT)
+test_fs_copy_propagation_OBJECTS =  \
+	$(am_test_fs_copy_propagation_OBJECTS)
+test_fs_copy_propagation_DEPENDENCIES =  \
+	$(top_builddir)/src/gtest/libgtest.la $(am__DEPENDENCIES_2)
 am_test_fs_saturate_propagation_OBJECTS =  \
 	test_fs_saturate_propagation.$(OBJEXT)
 test_fs_saturate_propagation_OBJECTS =  \
@@ -369,6 +377,7 @@
 	$(libi965_gen9_la_SOURCES) $(test_eu_compact_SOURCES) \
 	$(nodist_EXTRA_test_eu_compact_SOURCES) \
 	$(test_fs_cmod_propagation_SOURCES) \
+	$(test_fs_copy_propagation_SOURCES) \
 	$(test_fs_saturate_propagation_SOURCES) \
 	$(test_vec4_cmod_propagation_SOURCES) \
 	$(test_vec4_copy_propagation_SOURCES) \
@@ -379,6 +388,7 @@
 	$(libi965_gen7_la_SOURCES) $(libi965_gen75_la_SOURCES) \
 	$(libi965_gen8_la_SOURCES) $(libi965_gen9_la_SOURCES) \
 	$(test_eu_compact_SOURCES) $(test_fs_cmod_propagation_SOURCES) \
+	$(test_fs_copy_propagation_SOURCES) \
 	$(test_fs_saturate_propagation_SOURCES) \
 	$(test_vec4_cmod_propagation_SOURCES) \
 	$(test_vec4_copy_propagation_SOURCES) \
@@ -1260,6 +1270,13 @@
 	$(top_builddir)/src/gtest/libgtest.la \
 	$(TEST_LIBS)
 
+test_fs_copy_propagation_SOURCES = \
+	test_fs_copy_propagation.cpp
+
+test_fs_copy_propagation_LDADD = \
+	$(top_builddir)/src/gtest/libgtest.la \
+	$(TEST_LIBS)
+
 test_fs_saturate_propagation_SOURCES = \
 	test_fs_saturate_propagation.cpp
 
@@ -1385,6 +1402,10 @@
 	@rm -f test_fs_cmod_propagation$(EXEEXT)
 	$(AM_V_CXXLD)$(CXXLINK) $(test_fs_cmod_propagation_OBJECTS) $(test_fs_cmod_propagation_LDADD) $(LIBS)
 
+test_fs_copy_propagation$(EXEEXT): $(test_fs_copy_propagation_OBJECTS) $(test_fs_copy_propagation_DEPENDENCIES) $(EXTRA_test_fs_copy_propagation_DEPENDENCIES) 
+	@rm -f test_fs_copy_propagation$(EXEEXT)
+	$(AM_V_CXXLD)$(CXXLINK) $(test_fs_copy_propagation_OBJECTS) $(test_fs_copy_propagation_LDADD) $(LIBS)
+
 test_fs_saturate_propagation$(EXEEXT): $(test_fs_saturate_propagation_OBJECTS) $(test_fs_saturate_propagation_DEPENDENCIES) $(EXTRA_test_fs_saturate_propagation_DEPENDENCIES) 
 	@rm -f test_fs_saturate_propagation$(EXEEXT)
 	$(AM_V_CXXLD)$(CXXLINK) $(test_fs_saturate_propagation_OBJECTS) $(test_fs_saturate_propagation_LDADD) $(LIBS)
@@ -1611,6 +1632,7 @@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libi965_gen9_la-genX_blorp_exec.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_eu_compact.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_fs_cmod_propagation.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_fs_copy_propagation.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_fs_saturate_propagation.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_vec4_cmod_propagation.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_vec4_copy_propagation.Po@am__quote@
@@ -1905,6 +1927,13 @@
 	$(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \
 	--log-file $$b.log --trs-file $$b.trs \
 	$(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
+	"$$tst" $(AM_TESTS_FD_REDIRECT)
+test_fs_copy_propagation.log: test_fs_copy_propagation$(EXEEXT)
+	@p='test_fs_copy_propagation$(EXEEXT)'; \
+	b='test_fs_copy_propagation'; \
+	$(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \
+	--log-file $$b.log --trs-file $$b.trs \
+	$(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
 	"$$tst" $(AM_TESTS_FD_REDIRECT)
 test_fs_saturate_propagation.log: test_fs_saturate_propagation$(EXEEXT)
 	@p='test_fs_saturate_propagation$(EXEEXT)'; \
diff -Nru mesa-13.0.2/src/mesa/drivers/dri/i965/test_fs_copy_propagation.cpp mesa-13.0.3/src/mesa/drivers/dri/i965/test_fs_copy_propagation.cpp
--- mesa-13.0.2/src/mesa/drivers/dri/i965/test_fs_copy_propagation.cpp	1970-01-01 00:00:00.000000000 +0000
+++ mesa-13.0.3/src/mesa/drivers/dri/i965/test_fs_copy_propagation.cpp	2016-12-24 13:03:53.000000000 +0000
@@ -0,0 +1,213 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <gtest/gtest.h>
+#include "brw_fs.h"
+#include "brw_cfg.h"
+#include "program/program.h"
+
+using namespace brw;
+
+class copy_propagation_test : public ::testing::Test {
+   virtual void SetUp();
+
+public:
+   struct brw_compiler *compiler;
+   struct gen_device_info *devinfo;
+   struct gl_context *ctx;
+   struct brw_wm_prog_data *prog_data;
+   struct gl_shader_program *shader_prog;
+   fs_visitor *v;
+};
+
+class copy_propagation_fs_visitor : public fs_visitor
+{
+public:
+   copy_propagation_fs_visitor(struct brw_compiler *compiler,
+                               struct brw_wm_prog_data *prog_data,
+                               nir_shader *shader)
+      : fs_visitor(compiler, NULL, NULL, NULL,
+                   &prog_data->base, (struct gl_program *) NULL,
+                   shader, 8, -1) {}
+};
+
+
+void copy_propagation_test::SetUp()
+{
+   ctx = (struct gl_context *)calloc(1, sizeof(*ctx));
+   compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler));
+   devinfo = (struct gen_device_info *)calloc(1, sizeof(*devinfo));
+   compiler->devinfo = devinfo;
+
+   prog_data = ralloc(NULL, struct brw_wm_prog_data);
+   nir_shader *shader =
+      nir_shader_create(NULL, MESA_SHADER_FRAGMENT, NULL);
+
+   v = new copy_propagation_fs_visitor(compiler, prog_data, shader);
+
+   devinfo->gen = 4;
+}
+
+static fs_inst *
+instruction(bblock_t *block, int num)
+{
+   fs_inst *inst = (fs_inst *)block->start();
+   for (int i = 0; i < num; i++) {
+      inst = (fs_inst *)inst->next;
+   }
+   return inst;
+}
+
+static bool
+copy_propagation(fs_visitor *v)
+{
+   const bool print = getenv("TEST_DEBUG");
+
+   if (print) {
+      fprintf(stderr, "= Before =\n");
+      v->cfg->dump(v);
+   }
+
+   bool ret = v->opt_copy_propagation();
+
+   if (print) {
+      fprintf(stderr, "\n= After =\n");
+      v->cfg->dump(v);
+   }
+
+   return ret;
+}
+
+TEST_F(copy_propagation_test, basic)
+{
+   const fs_builder &bld = v->bld;
+   fs_reg vgrf0 = v->vgrf(glsl_type::float_type);
+   fs_reg vgrf1 = v->vgrf(glsl_type::float_type);
+   fs_reg vgrf2 = v->vgrf(glsl_type::float_type);
+   fs_reg vgrf3 = v->vgrf(glsl_type::float_type);
+   bld.MOV(vgrf0, vgrf2);
+   bld.ADD(vgrf1, vgrf0, vgrf3);
+
+   /* = Before =
+    *
+    * 0: mov(8)        vgrf0  vgrf2
+    * 1: add(8)        vgrf1  vgrf0  vgrf3
+    *
+    * = After =
+    * 0: mov(8)        vgrf0  vgrf2
+    * 1: add(8)        vgrf1  vgrf2  vgrf3
+    */
+
+   v->calculate_cfg();
+   bblock_t *block0 = v->cfg->blocks[0];
+
+   EXPECT_EQ(0, block0->start_ip);
+   EXPECT_EQ(1, block0->end_ip);
+
+   EXPECT_TRUE(copy_propagation(v));
+   EXPECT_EQ(0, block0->start_ip);
+   EXPECT_EQ(1, block0->end_ip);
+
+   fs_inst *mov = instruction(block0, 0);
+   EXPECT_EQ(BRW_OPCODE_MOV, mov->opcode);
+   EXPECT_TRUE(mov->dst.equals(vgrf0));
+   EXPECT_TRUE(mov->src[0].equals(vgrf2));
+
+   fs_inst *add = instruction(block0, 1);
+   EXPECT_EQ(BRW_OPCODE_ADD, add->opcode);
+   EXPECT_TRUE(add->dst.equals(vgrf1));
+   EXPECT_TRUE(add->src[0].equals(vgrf2));
+   EXPECT_TRUE(add->src[1].equals(vgrf3));
+}
+
+TEST_F(copy_propagation_test, maxmax_sat_imm)
+{
+   const fs_builder &bld = v->bld;
+   fs_reg vgrf0 = v->vgrf(glsl_type::float_type);
+   fs_reg vgrf1 = v->vgrf(glsl_type::float_type);
+   fs_reg vgrf2 = v->vgrf(glsl_type::float_type);
+
+   static const struct {
+      enum brw_conditional_mod conditional_mod;
+      float immediate;
+      bool expected_result;
+   } test[] = {
+      /*   conditional mod,     imm, expected_result */
+      { BRW_CONDITIONAL_GE  ,  0.1f, true },
+      { BRW_CONDITIONAL_L   ,  0.1f, true },
+      { BRW_CONDITIONAL_GE  ,  0.5f, true },
+      { BRW_CONDITIONAL_L   ,  0.5f, true },
+      { BRW_CONDITIONAL_GE  ,  0.9f, true },
+      { BRW_CONDITIONAL_L   ,  0.9f, true },
+      { BRW_CONDITIONAL_GE  , -1.5f, false },
+      { BRW_CONDITIONAL_L   , -1.5f, false },
+      { BRW_CONDITIONAL_GE  ,  1.5f, false },
+      { BRW_CONDITIONAL_L   ,  1.5f, false },
+
+      { BRW_CONDITIONAL_NONE, 0.5f, false },
+      { BRW_CONDITIONAL_Z   , 0.5f, false },
+      { BRW_CONDITIONAL_NZ  , 0.5f, false },
+      { BRW_CONDITIONAL_G   , 0.5f, false },
+      { BRW_CONDITIONAL_LE  , 0.5f, false },
+      { BRW_CONDITIONAL_R   , 0.5f, false },
+      { BRW_CONDITIONAL_O   , 0.5f, false },
+      { BRW_CONDITIONAL_U   , 0.5f, false },
+   };
+
+   for (unsigned i = 0; i < sizeof(test) / sizeof(test[0]); i++) {
+      fs_inst *mov = set_saturate(true, bld.MOV(vgrf0, vgrf1));
+      fs_inst *sel = set_condmod(test[i].conditional_mod,
+                                 bld.SEL(vgrf2, vgrf0,
+                                         brw_imm_f(test[i].immediate)));
+
+      v->calculate_cfg();
+
+      bblock_t *block0 = v->cfg->blocks[0];
+
+      EXPECT_EQ(0, block0->start_ip);
+      EXPECT_EQ(1, block0->end_ip);
+
+      EXPECT_EQ(test[i].expected_result, copy_propagation(v));
+      EXPECT_EQ(0, block0->start_ip);
+      EXPECT_EQ(1, block0->end_ip);
+
+      EXPECT_EQ(BRW_OPCODE_MOV, mov->opcode);
+      EXPECT_TRUE(mov->saturate);
+      EXPECT_TRUE(mov->dst.equals(vgrf0));
+      EXPECT_TRUE(mov->src[0].equals(vgrf1));
+
+      EXPECT_EQ(BRW_OPCODE_SEL, sel->opcode);
+      EXPECT_EQ(test[i].conditional_mod, sel->conditional_mod);
+      EXPECT_EQ(test[i].expected_result, sel->saturate);
+      EXPECT_TRUE(sel->dst.equals(vgrf2));
+      if (test[i].expected_result) {
+         EXPECT_TRUE(sel->src[0].equals(vgrf1));
+      } else {
+         EXPECT_TRUE(sel->src[0].equals(vgrf0));
+      }
+      EXPECT_TRUE(sel->src[1].equals(brw_imm_f(test[i].immediate)));
+
+      delete v->cfg;
+      v->cfg = NULL;
+   }
+}
diff -Nru mesa-13.0.2/src/mesa/main/api_validate.c mesa-13.0.3/src/mesa/main/api_validate.c
--- mesa-13.0.2/src/mesa/main/api_validate.c	2016-11-10 22:05:17.000000000 +0000
+++ mesa-13.0.3/src/mesa/main/api_validate.c	2016-12-14 19:03:11.000000000 +0000
@@ -925,7 +925,7 @@
     * buffer bound.
     */
    if (_mesa_is_gles31(ctx) &&
-       ctx->Array.VAO->_Enabled != ctx->Array.VAO->VertexAttribBufferMask) {
+       ctx->Array.VAO->_Enabled & ~ctx->Array.VAO->VertexAttribBufferMask) {
       _mesa_error(ctx, GL_INVALID_OPERATION, "%s(No VBO bound)", name);
       return GL_FALSE;
    }
diff -Nru mesa-13.0.2/src/mesa/main/fbobject.c mesa-13.0.3/src/mesa/main/fbobject.c
--- mesa-13.0.2/src/mesa/main/fbobject.c	2016-11-10 22:05:17.000000000 +0000
+++ mesa-13.0.3/src/mesa/main/fbobject.c	2016-12-16 14:03:24.000000000 +0000
@@ -2850,6 +2850,7 @@
    dst_att->Type = src_att->Type;
    dst_att->Complete = src_att->Complete;
    dst_att->TextureLevel = src_att->TextureLevel;
+   dst_att->CubeMapFace = src_att->CubeMapFace;
    dst_att->Zoffset = src_att->Zoffset;
    dst_att->Layered = src_att->Layered;
 }
diff -Nru mesa-13.0.2/src/mesa/main/program_resource.c mesa-13.0.3/src/mesa/main/program_resource.c
--- mesa-13.0.2/src/mesa/main/program_resource.c	2016-11-10 22:05:17.000000000 +0000
+++ mesa-13.0.3/src/mesa/main/program_resource.c	2016-12-14 19:03:10.000000000 +0000
@@ -67,9 +67,7 @@
 }
 
 static struct gl_shader_program *
-lookup_linked_program(GLuint program,
-                      const char *caller,
-                      bool raise_link_error)
+lookup_linked_program(GLuint program, const char *caller)
 {
    GET_CURRENT_CONTEXT(ctx);
    struct gl_shader_program *prog =
@@ -79,66 +77,13 @@
       return NULL;
 
    if (prog->LinkStatus == GL_FALSE) {
-      if (raise_link_error)
-         _mesa_error(ctx, GL_INVALID_OPERATION, "%s(program not linked)",
-                     caller);
+      _mesa_error(ctx, GL_INVALID_OPERATION, "%s(program not linked)",
+                  caller);
       return NULL;
    }
    return prog;
 }
 
-static GLenum
-stage_from_program_interface(GLenum programInterface)
-{
-   switch(programInterface) {
-   case GL_VERTEX_SUBROUTINE_UNIFORM:
-      return MESA_SHADER_VERTEX;
-   case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
-      return MESA_SHADER_TESS_CTRL;
-   case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
-      return MESA_SHADER_TESS_EVAL;
-   case GL_GEOMETRY_SUBROUTINE_UNIFORM:
-      return MESA_SHADER_GEOMETRY;
-   case GL_FRAGMENT_SUBROUTINE_UNIFORM:
-      return MESA_SHADER_FRAGMENT;
-   case GL_COMPUTE_SUBROUTINE_UNIFORM:
-      return MESA_SHADER_COMPUTE;
-   default:
-      unreachable("unexpected programInterface value");
-   }
-}
-
-static struct gl_linked_shader *
-lookup_linked_shader(GLuint program,
-                     GLenum programInterface,
-                     const char *caller)
-{
-   struct gl_shader_program *shLinkedProg =
-      lookup_linked_program(program, caller, false);
-   gl_shader_stage stage = stage_from_program_interface(programInterface);
-
-   if (!shLinkedProg)
-      return NULL;
-
-   return shLinkedProg->_LinkedShaders[stage];
-}
-
-static bool
-is_subroutine_uniform_program_interface(GLenum programInterface)
-{
-   switch(programInterface) {
-   case GL_VERTEX_SUBROUTINE_UNIFORM:
-   case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
-   case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
-   case GL_GEOMETRY_SUBROUTINE_UNIFORM:
-   case GL_FRAGMENT_SUBROUTINE_UNIFORM:
-   case GL_COMPUTE_SUBROUTINE_UNIFORM:
-      return true;
-   default:
-      return false;
-   }
-}
-
 void GLAPIENTRY
 _mesa_GetProgramInterfaceiv(GLuint program, GLenum programInterface,
                             GLenum pname, GLint *params)
@@ -174,49 +119,9 @@
    /* Validate pname against interface. */
    switch(pname) {
    case GL_ACTIVE_RESOURCES:
-      if (is_subroutine_uniform_program_interface(programInterface)) {
-         /* ARB_program_interface_query doesn't explicitly says that those
-          * uniforms would need a linked shader, or that should fail if it is
-          * not the case, but Section 7.6 (Uniform Variables) of the OpenGL
-          * 4.4 Core Profile says:
-          *
-          *    "A uniform is considered an active uniform if the compiler and
-          *     linker determine that the uniform will actually be accessed
-          *     when the executable code is executed. In cases where the
-          *     compiler and linker cannot make a conclusive determination,
-          *     the uniform will be considered active."
-          *
-          * So in order to know the real number of active subroutine uniforms
-          * we would need a linked shader .
-          *
-          * At the same time, Section 7.3 (Program Objects) of the OpenGL 4.4
-          * Core Profile says:
-          *
-          *    "The GL provides various commands allowing applications to
-          *     enumerate and query properties of active variables and in-
-          *     terface blocks for a specified program. If one of these
-          *     commands is called with a program for which LinkProgram
-          *     succeeded, the information recorded when the program was
-          *     linked is returned. If one of these commands is called with a
-          *     program for which LinkProgram failed, no error is generated
-          *     unless otherwise noted."
-          *     <skip>
-          *    "If one of these commands is called with a program for which
-          *     LinkProgram had never been called, no error is generated
-          *     unless otherwise noted, and the program object is considered
-          *     to have no active variables or interface blocks."
-          *
-          * So if the program is not linked we will return 0.
-          */
-         struct gl_linked_shader *sh =
-            lookup_linked_shader(program, programInterface, "glGetProgramInterfaceiv");
-
-         *params = sh ? sh->NumSubroutineUniforms : 0;
-      } else {
-         for (i = 0, *params = 0; i < shProg->NumProgramResourceList; i++)
-            if (shProg->ProgramResourceList[i].Type == programInterface)
-               (*params)++;
-      }
+      for (i = 0, *params = 0; i < shProg->NumProgramResourceList; i++)
+         if (shProg->ProgramResourceList[i].Type == programInterface)
+            (*params)++;
       break;
    case GL_MAX_NAME_LENGTH:
       if (programInterface == GL_ATOMIC_COUNTER_BUFFER ||
@@ -500,7 +405,7 @@
    }
 
    struct gl_shader_program *shProg =
-      lookup_linked_program(program, "glGetProgramResourceLocation", true);
+      lookup_linked_program(program, "glGetProgramResourceLocation");
 
    if (!shProg || !name)
       return -1;
@@ -556,7 +461,7 @@
    }
 
    struct gl_shader_program *shProg =
-      lookup_linked_program(program, "glGetProgramResourceLocationIndex", true);
+      lookup_linked_program(program, "glGetProgramResourceLocationIndex");
 
    if (!shProg || !name)
       return -1;
diff -Nru mesa-13.0.2/src/vulkan/wsi/wsi_common_queue.h mesa-13.0.3/src/vulkan/wsi/wsi_common_queue.h
--- mesa-13.0.2/src/vulkan/wsi/wsi_common_queue.h	2016-11-28 12:18:12.000000000 +0000
+++ mesa-13.0.3/src/vulkan/wsi/wsi_common_queue.h	2016-12-14 19:03:10.000000000 +0000
@@ -65,6 +65,7 @@
    if (ret)
       goto fail_cond;
 
+   pthread_condattr_destroy(&condattr);
    return 0;
 
 fail_cond:
diff -Nru mesa-13.0.2/VERSION mesa-13.0.3/VERSION
--- mesa-13.0.2/VERSION	2016-11-28 15:02:37.000000000 +0000
+++ mesa-13.0.3/VERSION	2017-01-05 15:49:50.000000000 +0000
@@ -1 +1 @@
-13.0.2
+13.0.3